{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 161,
   "id": "8839a2ee-257d-4182-8c82-01d012d8f888",
   "metadata": {},
   "outputs": [],
   "source": [
    "### Execute TRANFAC enrichment analysis based on co-eqtl results"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "26bafe98-a70d-4052-ba5d-fca1b4115633",
   "metadata": {
    "tags": []
   },
   "source": [
    "# Libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 162,
   "id": "946a1c00-83e8-4260-9093-e79e373c1fe0",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "source('MS1_Libraries.r')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cb04a3b3-e6f5-4458-8b8e-c925813cee89",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "6b9b799d-d266-4fc8-a49b-ebb550fa64fd",
   "metadata": {},
   "source": [
    "# Parameters"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 163,
   "id": "35c8b11a-e882-4bbb-9706-6c13b9c522f1",
   "metadata": {},
   "outputs": [],
   "source": [
    "### Path to input data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 164,
   "id": "c517df12-ff3c-492e-a0a3-53d7cbdb945d",
   "metadata": {},
   "outputs": [],
   "source": [
    "path<-\"\"\n",
    "outdir<-\"\""
   ]
  },
  {
   "cell_type": "markdown",
   "id": "309ead5e-4bbd-4370-8089-5dfa0c53a194",
   "metadata": {},
   "source": [
    "# Data "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "759d077f-409c-4e52-857c-47af7be21134",
   "metadata": {},
   "source": [
    "## Enrichment Data Input"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 165,
   "id": "8dfbd479-4b4a-4abd-828b-c668708fa7e9",
   "metadata": {},
   "outputs": [],
   "source": [
    "### Exemplary data input load for a cell-type"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 166,
   "id": "4bbb56ca-273c-42dd-857c-be3d358ded78",
   "metadata": {},
   "outputs": [],
   "source": [
    "cell_type_var = \"CD4T\"\n",
    "# c(\"CD4T\",\"CD8T\",\"monocyte\",\"NK\",\"B\",\"DC\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 167,
   "id": "54fb40a9-c9c6-4f88-9ab8-fc98fa60d279",
   "metadata": {},
   "outputs": [],
   "source": [
    "for(cell_type in cell_type_var){\n",
    "\n",
    "  coeqtls <- fread(paste0(path, \"UT_\",cell_type, \n",
    "                         \"_coeqtls_fullresults_fixed.all.tsv.gz\"))\n",
    "  coeqtls$gene1<-gsub(\";.*\",\"\",coeqtls$Gene)\n",
    "  coeqtls$gene2<-gsub(\".*;\",\"\",coeqtls$Gene)\n",
    "  coeqtls$second_gene<-ifelse(coeqtls$gene1 == coeqtls$eqtlgen, coeqtls$gene2,\n",
    "                        coeqtls$gene1)\n",
    "  coeqtls$gene1<-NULL\n",
    "  coeqtls$gene2<-NULL\n",
    "    }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 168,
   "id": "7e950293-2939-4bd8-905d-729dd8c1a278",
   "metadata": {},
   "outputs": [],
   "source": [
    "#unique(coeqtls$eqtlgene)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 169,
   "id": "78dd2696-0f6c-40a8-93c4-0f1d6b3b99a0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "372"
      ],
      "text/latex": [
       "372"
      ],
      "text/markdown": [
       "372"
      ],
      "text/plain": [
       "[1] 372"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "nrow(coeqtls[(coeqtls$eqtlgene ==  'RPS26') &  (coeqtls$gene2_isSig == TRUE),c('eqtlgene', 'second_gene')])\n",
    "\n",
    "# validity check --> 372 significant co-egenes for RPS26"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 170,
   "id": "1535e95a-ec90-42e9-a5a7-de98dea38ea8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "742"
      ],
      "text/latex": [
       "742"
      ],
      "text/markdown": [
       "742"
      ],
      "text/plain": [
       "[1] 742"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "nrow(coeqtls[(coeqtls$eqtlgene ==  'RPS26'),c('eqtlgene', 'second_gene')])\n",
    "# overall 742 --> those that would not haven been tested"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 171,
   "id": "599506b1-e79b-4c3a-9c38-b32dd14bf5ad",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"dataframe\">\n",
       "<caption>A data.table: 2 × 38</caption>\n",
       "<thead>\n",
       "\t<tr><th scope=col>snp_genepair</th><th scope=col>Gene</th><th scope=col>GeneChr</th><th scope=col>GenePos</th><th scope=col>GeneStrand</th><th scope=col>GeneSymbol</th><th scope=col>SNP</th><th scope=col>SNPChr</th><th scope=col>SNPPos</th><th scope=col>SNPAlleles</th><th scope=col>⋯</th><th scope=col>multipletestP</th><th scope=col>eqtlgene</th><th scope=col>snp_eqtlgene</th><th scope=col>snp_beta_shape1</th><th scope=col>snp_beta_shape2</th><th scope=col>snp_pvalbeta</th><th scope=col>snp_qval</th><th scope=col>gene2_pthreshold</th><th scope=col>gene2_isSig</th><th scope=col>second_gene</th></tr>\n",
       "\t<tr><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;lgl&gt;</th><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;chr&gt;</th><th scope=col>⋯</th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;lgl&gt;</th><th scope=col>&lt;chr&gt;</th></tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "\t<tr><td>rs11587831_C1orf86;NUDT22</td><td>C1orf86;NUDT22</td><td>1</td><td>2115903</td><td>NA</td><td>C1orf86;NUDT22</td><td>rs11587831</td><td>1</td><td>2110848</td><td>T/G</td><td>⋯</td><td>0.6354470</td><td>C1orf86</td><td>rs11587831_C1orf86</td><td>1.197903</td><td>127.555</td><td>0.5044989</td><td>0.7012273</td><td>4.539067e-05</td><td>FALSE</td><td>NUDT22</td></tr>\n",
       "\t<tr><td>rs11587831_C1orf86;SDHC  </td><td>C1orf86;SDHC  </td><td>1</td><td>2115903</td><td>NA</td><td>C1orf86;SDHC  </td><td>rs11587831</td><td>1</td><td>2110848</td><td>T/G</td><td>⋯</td><td>0.9144163</td><td>C1orf86</td><td>rs11587831_C1orf86</td><td>1.197903</td><td>127.555</td><td>0.5044989</td><td>0.7012273</td><td>4.539067e-05</td><td>FALSE</td><td>SDHC  </td></tr>\n",
       "</tbody>\n",
       "</table>\n"
      ],
      "text/latex": [
       "A data.table: 2 × 38\n",
       "\\begin{tabular}{lllllllllllllllllllll}\n",
       " snp\\_genepair & Gene & GeneChr & GenePos & GeneStrand & GeneSymbol & SNP & SNPChr & SNPPos & SNPAlleles & ⋯ & multipletestP & eqtlgene & snp\\_eqtlgene & snp\\_beta\\_shape1 & snp\\_beta\\_shape2 & snp\\_pvalbeta & snp\\_qval & gene2\\_pthreshold & gene2\\_isSig & second\\_gene\\\\\n",
       " <chr> & <chr> & <int> & <int> & <lgl> & <chr> & <chr> & <int> & <int> & <chr> & ⋯ & <dbl> & <chr> & <chr> & <dbl> & <dbl> & <dbl> & <dbl> & <dbl> & <lgl> & <chr>\\\\\n",
       "\\hline\n",
       "\t rs11587831\\_C1orf86;NUDT22 & C1orf86;NUDT22 & 1 & 2115903 & NA & C1orf86;NUDT22 & rs11587831 & 1 & 2110848 & T/G & ⋯ & 0.6354470 & C1orf86 & rs11587831\\_C1orf86 & 1.197903 & 127.555 & 0.5044989 & 0.7012273 & 4.539067e-05 & FALSE & NUDT22\\\\\n",
       "\t rs11587831\\_C1orf86;SDHC   & C1orf86;SDHC   & 1 & 2115903 & NA & C1orf86;SDHC   & rs11587831 & 1 & 2110848 & T/G & ⋯ & 0.9144163 & C1orf86 & rs11587831\\_C1orf86 & 1.197903 & 127.555 & 0.5044989 & 0.7012273 & 4.539067e-05 & FALSE & SDHC  \\\\\n",
       "\\end{tabular}\n"
      ],
      "text/markdown": [
       "\n",
       "A data.table: 2 × 38\n",
       "\n",
       "| snp_genepair &lt;chr&gt; | Gene &lt;chr&gt; | GeneChr &lt;int&gt; | GenePos &lt;int&gt; | GeneStrand &lt;lgl&gt; | GeneSymbol &lt;chr&gt; | SNP &lt;chr&gt; | SNPChr &lt;int&gt; | SNPPos &lt;int&gt; | SNPAlleles &lt;chr&gt; | ⋯ ⋯ | multipletestP &lt;dbl&gt; | eqtlgene &lt;chr&gt; | snp_eqtlgene &lt;chr&gt; | snp_beta_shape1 &lt;dbl&gt; | snp_beta_shape2 &lt;dbl&gt; | snp_pvalbeta &lt;dbl&gt; | snp_qval &lt;dbl&gt; | gene2_pthreshold &lt;dbl&gt; | gene2_isSig &lt;lgl&gt; | second_gene &lt;chr&gt; |\n",
       "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n",
       "| rs11587831_C1orf86;NUDT22 | C1orf86;NUDT22 | 1 | 2115903 | NA | C1orf86;NUDT22 | rs11587831 | 1 | 2110848 | T/G | ⋯ | 0.6354470 | C1orf86 | rs11587831_C1orf86 | 1.197903 | 127.555 | 0.5044989 | 0.7012273 | 4.539067e-05 | FALSE | NUDT22 |\n",
       "| rs11587831_C1orf86;SDHC   | C1orf86;SDHC   | 1 | 2115903 | NA | C1orf86;SDHC   | rs11587831 | 1 | 2110848 | T/G | ⋯ | 0.9144163 | C1orf86 | rs11587831_C1orf86 | 1.197903 | 127.555 | 0.5044989 | 0.7012273 | 4.539067e-05 | FALSE | SDHC   |\n",
       "\n"
      ],
      "text/plain": [
       "  snp_genepair              Gene           GeneChr GenePos GeneStrand\n",
       "1 rs11587831_C1orf86;NUDT22 C1orf86;NUDT22 1       2115903 NA        \n",
       "2 rs11587831_C1orf86;SDHC   C1orf86;SDHC   1       2115903 NA        \n",
       "  GeneSymbol     SNP        SNPChr SNPPos  SNPAlleles ⋯ multipletestP eqtlgene\n",
       "1 C1orf86;NUDT22 rs11587831 1      2110848 T/G        ⋯ 0.6354470     C1orf86 \n",
       "2 C1orf86;SDHC   rs11587831 1      2110848 T/G        ⋯ 0.9144163     C1orf86 \n",
       "  snp_eqtlgene       snp_beta_shape1 snp_beta_shape2 snp_pvalbeta snp_qval \n",
       "1 rs11587831_C1orf86 1.197903        127.555         0.5044989    0.7012273\n",
       "2 rs11587831_C1orf86 1.197903        127.555         0.5044989    0.7012273\n",
       "  gene2_pthreshold gene2_isSig second_gene\n",
       "1 4.539067e-05     FALSE       NUDT22     \n",
       "2 4.539067e-05     FALSE       SDHC       "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "head(coeqtls,2)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e1a2e697-badf-4802-ad22-26a5ac2a9101",
   "metadata": {},
   "source": [
    "## ReMap Results for comparison"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 173,
   "id": "128f1402-b5bf-411d-b812-5dbed446f4a8",
   "metadata": {},
   "outputs": [],
   "source": [
    "## Load supplementary table (with ReMap Results to compare):\n",
    "# \"supptable15.TFenrichment_co-eGenes.xlsx - Sheet1.csv\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 174,
   "id": "4c21cd7a-c9da-4c2b-8877-bac38c095960",
   "metadata": {},
   "outputs": [],
   "source": [
    "old_enrichments = read.csv( paste0(path, \"supptable15.TFenrichment_co-eGenes.xlsx - Sheet1.csv\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 175,
   "id": "8b28f114-298e-4170-9e98-0644683d93fd",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "963"
      ],
      "text/latex": [
       "963"
      ],
      "text/markdown": [
       "963"
      ],
      "text/plain": [
       "[1] 963"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "nrow(old_enrichments)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 176,
   "id": "f159bb08-15b4-4a74-86a0-dd723fcc83b0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"dataframe\">\n",
       "<caption>A data.frame: 2 × 13</caption>\n",
       "<thead>\n",
       "\t<tr><th></th><th scope=col>Cell.type</th><th scope=col>eQTL..SNP.eGene.</th><th scope=col>TF</th><th scope=col>TF.is.a.co.eGene.</th><th scope=col>enrichment.p.value</th><th scope=col>X..TF.overlap...co.eGene</th><th scope=col>X..TF.overlap...background</th><th scope=col>X..no.TF.overlap...co.eGene</th><th scope=col>X..background.gene...not.co.eGene</th><th scope=col>enrichment.fdr</th><th scope=col>eQTL.SNP</th><th scope=col>SNP.overlaps.TF.</th><th scope=col>Names.of.overlapping.SNPs</th></tr>\n",
       "\t<tr><th></th><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;lgl&gt;</th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;lgl&gt;</th><th scope=col>&lt;chr&gt;</th></tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "\t<tr><th scope=row>1</th><td>CD4T</td><td>rs111454690_HLA-DRB5</td><td>CDK8   </td><td>FALSE</td><td>9.630369e-06</td><td>14</td><td>5</td><td>2778</td><td> 8515</td><td>1.640373e-03</td><td>rs111454690</td><td>FALSE</td><td></td></tr>\n",
       "\t<tr><th scope=row>2</th><td>CD4T</td><td>rs111454690_HLA-DRB5</td><td>SNRNP70</td><td>FALSE</td><td>1.209254e-09</td><td>11</td><td>8</td><td> 649</td><td>10644</td><td>6.179288e-07</td><td>rs111454690</td><td>FALSE</td><td></td></tr>\n",
       "</tbody>\n",
       "</table>\n"
      ],
      "text/latex": [
       "A data.frame: 2 × 13\n",
       "\\begin{tabular}{r|lllllllllllll}\n",
       "  & Cell.type & eQTL..SNP.eGene. & TF & TF.is.a.co.eGene. & enrichment.p.value & X..TF.overlap...co.eGene & X..TF.overlap...background & X..no.TF.overlap...co.eGene & X..background.gene...not.co.eGene & enrichment.fdr & eQTL.SNP & SNP.overlaps.TF. & Names.of.overlapping.SNPs\\\\\n",
       "  & <chr> & <chr> & <chr> & <lgl> & <dbl> & <int> & <int> & <int> & <int> & <dbl> & <chr> & <lgl> & <chr>\\\\\n",
       "\\hline\n",
       "\t1 & CD4T & rs111454690\\_HLA-DRB5 & CDK8    & FALSE & 9.630369e-06 & 14 & 5 & 2778 &  8515 & 1.640373e-03 & rs111454690 & FALSE & \\\\\n",
       "\t2 & CD4T & rs111454690\\_HLA-DRB5 & SNRNP70 & FALSE & 1.209254e-09 & 11 & 8 &  649 & 10644 & 6.179288e-07 & rs111454690 & FALSE & \\\\\n",
       "\\end{tabular}\n"
      ],
      "text/markdown": [
       "\n",
       "A data.frame: 2 × 13\n",
       "\n",
       "| <!--/--> | Cell.type &lt;chr&gt; | eQTL..SNP.eGene. &lt;chr&gt; | TF &lt;chr&gt; | TF.is.a.co.eGene. &lt;lgl&gt; | enrichment.p.value &lt;dbl&gt; | X..TF.overlap...co.eGene &lt;int&gt; | X..TF.overlap...background &lt;int&gt; | X..no.TF.overlap...co.eGene &lt;int&gt; | X..background.gene...not.co.eGene &lt;int&gt; | enrichment.fdr &lt;dbl&gt; | eQTL.SNP &lt;chr&gt; | SNP.overlaps.TF. &lt;lgl&gt; | Names.of.overlapping.SNPs &lt;chr&gt; |\n",
       "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n",
       "| 1 | CD4T | rs111454690_HLA-DRB5 | CDK8    | FALSE | 9.630369e-06 | 14 | 5 | 2778 |  8515 | 1.640373e-03 | rs111454690 | FALSE | <!----> |\n",
       "| 2 | CD4T | rs111454690_HLA-DRB5 | SNRNP70 | FALSE | 1.209254e-09 | 11 | 8 |  649 | 10644 | 6.179288e-07 | rs111454690 | FALSE | <!----> |\n",
       "\n"
      ],
      "text/plain": [
       "  Cell.type eQTL..SNP.eGene.     TF      TF.is.a.co.eGene. enrichment.p.value\n",
       "1 CD4T      rs111454690_HLA-DRB5 CDK8    FALSE             9.630369e-06      \n",
       "2 CD4T      rs111454690_HLA-DRB5 SNRNP70 FALSE             1.209254e-09      \n",
       "  X..TF.overlap...co.eGene X..TF.overlap...background\n",
       "1 14                       5                         \n",
       "2 11                       8                         \n",
       "  X..no.TF.overlap...co.eGene X..background.gene...not.co.eGene enrichment.fdr\n",
       "1 2778                         8515                             1.640373e-03  \n",
       "2  649                        10644                             6.179288e-07  \n",
       "  eQTL.SNP    SNP.overlaps.TF. Names.of.overlapping.SNPs\n",
       "1 rs111454690 FALSE                                     \n",
       "2 rs111454690 FALSE                                     "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "head(old_enrichments,2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 177,
   "id": "c7fc8d4c-f820-4793-b58d-b194e56d6b4c",
   "metadata": {},
   "outputs": [],
   "source": [
    "## Check out some results of ReMap mentioned in paper"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 178,
   "id": "4b439eaa-c15a-4917-9ae8-4df0afd7475a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "0.0132347204977557"
      ],
      "text/latex": [
       "0.0132347204977557"
      ],
      "text/markdown": [
       "0.0132347204977557"
      ],
      "text/plain": [
       "[1] 0.01323472"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "max(old_enrichments$enrichment.p.value)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 179,
   "id": "7899b194-8054-4a2a-b9d7-3e051f7bc380",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "0.0498313855060291"
      ],
      "text/latex": [
       "0.0498313855060291"
      ],
      "text/markdown": [
       "0.0498313855060291"
      ],
      "text/plain": [
       "[1] 0.04983139"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "max(old_enrichments$enrichment.fdr)\n",
    "# check to use same cut-off for TRANSFAC  --> 0.05"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 180,
   "id": "384f4ce7-13c5-4230-a0d7-5aaa262d1112",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>\n",
       ".list-inline {list-style: none; margin:0; padding: 0}\n",
       ".list-inline>li {display: inline-block}\n",
       ".list-inline>li:not(:last-child)::after {content: \"\\00b7\"; padding: 0 .5ex}\n",
       "</style>\n",
       "<ol class=list-inline><li>'rs111454690_HLA-DRB5'</li><li>'rs1131017_RPS26'</li><li>'rs4147638_SMDT1'</li><li>'rs7605824_SH3YL1'</li><li>'rs7632486_CMTM8'</li><li>'rs9271520_HLA-DQA2'</li><li>'rs1131017_RPS26_positive'</li><li>'rs1131017_RPS26_negative'</li><li>'rs6708265_PASK'</li></ol>\n"
      ],
      "text/latex": [
       "\\begin{enumerate*}\n",
       "\\item 'rs111454690\\_HLA-DRB5'\n",
       "\\item 'rs1131017\\_RPS26'\n",
       "\\item 'rs4147638\\_SMDT1'\n",
       "\\item 'rs7605824\\_SH3YL1'\n",
       "\\item 'rs7632486\\_CMTM8'\n",
       "\\item 'rs9271520\\_HLA-DQA2'\n",
       "\\item 'rs1131017\\_RPS26\\_positive'\n",
       "\\item 'rs1131017\\_RPS26\\_negative'\n",
       "\\item 'rs6708265\\_PASK'\n",
       "\\end{enumerate*}\n"
      ],
      "text/markdown": [
       "1. 'rs111454690_HLA-DRB5'\n",
       "2. 'rs1131017_RPS26'\n",
       "3. 'rs4147638_SMDT1'\n",
       "4. 'rs7605824_SH3YL1'\n",
       "5. 'rs7632486_CMTM8'\n",
       "6. 'rs9271520_HLA-DQA2'\n",
       "7. 'rs1131017_RPS26_positive'\n",
       "8. 'rs1131017_RPS26_negative'\n",
       "9. 'rs6708265_PASK'\n",
       "\n",
       "\n"
      ],
      "text/plain": [
       "[1] \"rs111454690_HLA-DRB5\"     \"rs1131017_RPS26\"         \n",
       "[3] \"rs4147638_SMDT1\"          \"rs7605824_SH3YL1\"        \n",
       "[5] \"rs7632486_CMTM8\"          \"rs9271520_HLA-DQA2\"      \n",
       "[7] \"rs1131017_RPS26_positive\" \"rs1131017_RPS26_negative\"\n",
       "[9] \"rs6708265_PASK\"          "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "unique(old_enrichments[,c( 'eQTL..SNP.eGene.')])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 181,
   "id": "6810f798-9fa5-4aea-9954-b517aa0b49b8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "9"
      ],
      "text/latex": [
       "9"
      ],
      "text/markdown": [
       "9"
      ],
      "text/plain": [
       "[1] 9"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "length(unique(old_enrichments[,c( 'eQTL..SNP.eGene.')]))  # subtract positive and negative case for RPS26 --> yields the 7 mentioned in paper for which there were significant TF enrichments"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 182,
   "id": "528cc4ad-2052-42b3-a6a4-b47bcfaa2bbb",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>\n",
       ".list-inline {list-style: none; margin:0; padding: 0}\n",
       ".list-inline>li {display: inline-block}\n",
       ".list-inline>li:not(:last-child)::after {content: \"\\00b7\"; padding: 0 .5ex}\n",
       "</style>\n",
       "<ol class=list-inline><li>'rs1131017_RPS26'</li><li>'rs4147638_SMDT1'</li><li>'rs7605824_SH3YL1'</li><li>'rs9271520_HLA-DQA2'</li><li>'rs1131017_RPS26_positive'</li><li>'rs1131017_RPS26_negative'</li></ol>\n"
      ],
      "text/latex": [
       "\\begin{enumerate*}\n",
       "\\item 'rs1131017\\_RPS26'\n",
       "\\item 'rs4147638\\_SMDT1'\n",
       "\\item 'rs7605824\\_SH3YL1'\n",
       "\\item 'rs9271520\\_HLA-DQA2'\n",
       "\\item 'rs1131017\\_RPS26\\_positive'\n",
       "\\item 'rs1131017\\_RPS26\\_negative'\n",
       "\\end{enumerate*}\n"
      ],
      "text/markdown": [
       "1. 'rs1131017_RPS26'\n",
       "2. 'rs4147638_SMDT1'\n",
       "3. 'rs7605824_SH3YL1'\n",
       "4. 'rs9271520_HLA-DQA2'\n",
       "5. 'rs1131017_RPS26_positive'\n",
       "6. 'rs1131017_RPS26_negative'\n",
       "\n",
       "\n"
      ],
      "text/plain": [
       "[1] \"rs1131017_RPS26\"          \"rs4147638_SMDT1\"         \n",
       "[3] \"rs7605824_SH3YL1\"         \"rs9271520_HLA-DQA2\"      \n",
       "[5] \"rs1131017_RPS26_positive\" \"rs1131017_RPS26_negative\""
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "unique(old_enrichments[old_enrichments$SNP.overlaps.TF. == TRUE,c('eQTL..SNP.eGene.')])  # results in the 4 pairs mentioned in paper"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 183,
   "id": "1e28ceb8-6515-4561-ac51-f1ab0860ad36",
   "metadata": {},
   "outputs": [],
   "source": [
    "## rs1131017–RPS26 examples: RMB39, TCF7, LEF1, KLF6, CD74, MAF"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 184,
   "id": "0a75dc32-fc85-4e53-ad4a-fa630f0460d4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"dataframe\">\n",
       "<caption>A data.frame: 7 × 13</caption>\n",
       "<thead>\n",
       "\t<tr><th></th><th scope=col>Cell.type</th><th scope=col>eQTL..SNP.eGene.</th><th scope=col>TF</th><th scope=col>TF.is.a.co.eGene.</th><th scope=col>enrichment.p.value</th><th scope=col>X..TF.overlap...co.eGene</th><th scope=col>X..TF.overlap...background</th><th scope=col>X..no.TF.overlap...co.eGene</th><th scope=col>X..background.gene...not.co.eGene</th><th scope=col>enrichment.fdr</th><th scope=col>eQTL.SNP</th><th scope=col>SNP.overlaps.TF.</th><th scope=col>Names.of.overlapping.SNPs</th></tr>\n",
       "\t<tr><th></th><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;lgl&gt;</th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;lgl&gt;</th><th scope=col>&lt;chr&gt;</th></tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "\t<tr><th scope=row>19</th><td>CD4T    </td><td>rs1131017_RPS26</td><td>MAF  </td><td>TRUE</td><td>3.654557e-06</td><td> 92</td><td>280</td><td>1747</td><td>9546</td><td>5.187441e-05</td><td>rs1131017</td><td>TRUE</td><td>rs1131017                     </td></tr>\n",
       "\t<tr><th scope=row>34</th><td>CD4T    </td><td>rs1131017_RPS26</td><td>RBM39</td><td>TRUE</td><td>2.128100e-06</td><td>244</td><td>128</td><td>6041</td><td>5252</td><td>3.295330e-05</td><td>rs1131017</td><td>TRUE</td><td>rs10876864,rs1131017,rs7297175</td></tr>\n",
       "\t<tr><th scope=row>50</th><td>CD4T    </td><td>rs1131017_RPS26</td><td>TCF7 </td><td>TRUE</td><td>7.468026e-03</td><td>134</td><td>238</td><td>3379</td><td>7914</td><td>3.052929e-02</td><td>rs1131017</td><td>TRUE</td><td>rs1131017                     </td></tr>\n",
       "\t<tr><th scope=row>84</th><td>CD4T    </td><td>rs1131017_RPS26</td><td>LEF1 </td><td>TRUE</td><td>4.859147e-05</td><td>153</td><td>219</td><td>3529</td><td>7764</td><td>4.598193e-04</td><td>rs1131017</td><td>TRUE</td><td>rs10876864,rs1131017          </td></tr>\n",
       "\t<tr><th scope=row>116</th><td>CD4T    </td><td>rs1131017_RPS26</td><td>KLF6 </td><td>TRUE</td><td>1.597304e-03</td><td>139</td><td>233</td><td>3385</td><td>7908</td><td>8.236538e-03</td><td>rs1131017</td><td>TRUE</td><td>rs10876864,rs1131017,rs7297175</td></tr>\n",
       "\t<tr><th scope=row>119</th><td>CD4T    </td><td>rs1131017_RPS26</td><td>CD74 </td><td>TRUE</td><td>3.954534e-06</td><td>172</td><td>200</td><td>3915</td><td>7378</td><td>5.461532e-05</td><td>rs1131017</td><td>TRUE</td><td>rs1131017                     </td></tr>\n",
       "\t<tr><th scope=row>730</th><td>monocyte</td><td>rs1131017_RPS26</td><td>CD74 </td><td>TRUE</td><td>7.422301e-03</td><td> 63</td><td> 69</td><td>3526</td><td>6028</td><td>3.134542e-02</td><td>rs1131017</td><td>TRUE</td><td>rs1131017                     </td></tr>\n",
       "</tbody>\n",
       "</table>\n"
      ],
      "text/latex": [
       "A data.frame: 7 × 13\n",
       "\\begin{tabular}{r|lllllllllllll}\n",
       "  & Cell.type & eQTL..SNP.eGene. & TF & TF.is.a.co.eGene. & enrichment.p.value & X..TF.overlap...co.eGene & X..TF.overlap...background & X..no.TF.overlap...co.eGene & X..background.gene...not.co.eGene & enrichment.fdr & eQTL.SNP & SNP.overlaps.TF. & Names.of.overlapping.SNPs\\\\\n",
       "  & <chr> & <chr> & <chr> & <lgl> & <dbl> & <int> & <int> & <int> & <int> & <dbl> & <chr> & <lgl> & <chr>\\\\\n",
       "\\hline\n",
       "\t19 & CD4T     & rs1131017\\_RPS26 & MAF   & TRUE & 3.654557e-06 &  92 & 280 & 1747 & 9546 & 5.187441e-05 & rs1131017 & TRUE & rs1131017                     \\\\\n",
       "\t34 & CD4T     & rs1131017\\_RPS26 & RBM39 & TRUE & 2.128100e-06 & 244 & 128 & 6041 & 5252 & 3.295330e-05 & rs1131017 & TRUE & rs10876864,rs1131017,rs7297175\\\\\n",
       "\t50 & CD4T     & rs1131017\\_RPS26 & TCF7  & TRUE & 7.468026e-03 & 134 & 238 & 3379 & 7914 & 3.052929e-02 & rs1131017 & TRUE & rs1131017                     \\\\\n",
       "\t84 & CD4T     & rs1131017\\_RPS26 & LEF1  & TRUE & 4.859147e-05 & 153 & 219 & 3529 & 7764 & 4.598193e-04 & rs1131017 & TRUE & rs10876864,rs1131017          \\\\\n",
       "\t116 & CD4T     & rs1131017\\_RPS26 & KLF6  & TRUE & 1.597304e-03 & 139 & 233 & 3385 & 7908 & 8.236538e-03 & rs1131017 & TRUE & rs10876864,rs1131017,rs7297175\\\\\n",
       "\t119 & CD4T     & rs1131017\\_RPS26 & CD74  & TRUE & 3.954534e-06 & 172 & 200 & 3915 & 7378 & 5.461532e-05 & rs1131017 & TRUE & rs1131017                     \\\\\n",
       "\t730 & monocyte & rs1131017\\_RPS26 & CD74  & TRUE & 7.422301e-03 &  63 &  69 & 3526 & 6028 & 3.134542e-02 & rs1131017 & TRUE & rs1131017                     \\\\\n",
       "\\end{tabular}\n"
      ],
      "text/markdown": [
       "\n",
       "A data.frame: 7 × 13\n",
       "\n",
       "| <!--/--> | Cell.type &lt;chr&gt; | eQTL..SNP.eGene. &lt;chr&gt; | TF &lt;chr&gt; | TF.is.a.co.eGene. &lt;lgl&gt; | enrichment.p.value &lt;dbl&gt; | X..TF.overlap...co.eGene &lt;int&gt; | X..TF.overlap...background &lt;int&gt; | X..no.TF.overlap...co.eGene &lt;int&gt; | X..background.gene...not.co.eGene &lt;int&gt; | enrichment.fdr &lt;dbl&gt; | eQTL.SNP &lt;chr&gt; | SNP.overlaps.TF. &lt;lgl&gt; | Names.of.overlapping.SNPs &lt;chr&gt; |\n",
       "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n",
       "| 19 | CD4T     | rs1131017_RPS26 | MAF   | TRUE | 3.654557e-06 |  92 | 280 | 1747 | 9546 | 5.187441e-05 | rs1131017 | TRUE | rs1131017                      |\n",
       "| 34 | CD4T     | rs1131017_RPS26 | RBM39 | TRUE | 2.128100e-06 | 244 | 128 | 6041 | 5252 | 3.295330e-05 | rs1131017 | TRUE | rs10876864,rs1131017,rs7297175 |\n",
       "| 50 | CD4T     | rs1131017_RPS26 | TCF7  | TRUE | 7.468026e-03 | 134 | 238 | 3379 | 7914 | 3.052929e-02 | rs1131017 | TRUE | rs1131017                      |\n",
       "| 84 | CD4T     | rs1131017_RPS26 | LEF1  | TRUE | 4.859147e-05 | 153 | 219 | 3529 | 7764 | 4.598193e-04 | rs1131017 | TRUE | rs10876864,rs1131017           |\n",
       "| 116 | CD4T     | rs1131017_RPS26 | KLF6  | TRUE | 1.597304e-03 | 139 | 233 | 3385 | 7908 | 8.236538e-03 | rs1131017 | TRUE | rs10876864,rs1131017,rs7297175 |\n",
       "| 119 | CD4T     | rs1131017_RPS26 | CD74  | TRUE | 3.954534e-06 | 172 | 200 | 3915 | 7378 | 5.461532e-05 | rs1131017 | TRUE | rs1131017                      |\n",
       "| 730 | monocyte | rs1131017_RPS26 | CD74  | TRUE | 7.422301e-03 |  63 |  69 | 3526 | 6028 | 3.134542e-02 | rs1131017 | TRUE | rs1131017                      |\n",
       "\n"
      ],
      "text/plain": [
       "    Cell.type eQTL..SNP.eGene. TF    TF.is.a.co.eGene. enrichment.p.value\n",
       "19  CD4T      rs1131017_RPS26  MAF   TRUE              3.654557e-06      \n",
       "34  CD4T      rs1131017_RPS26  RBM39 TRUE              2.128100e-06      \n",
       "50  CD4T      rs1131017_RPS26  TCF7  TRUE              7.468026e-03      \n",
       "84  CD4T      rs1131017_RPS26  LEF1  TRUE              4.859147e-05      \n",
       "116 CD4T      rs1131017_RPS26  KLF6  TRUE              1.597304e-03      \n",
       "119 CD4T      rs1131017_RPS26  CD74  TRUE              3.954534e-06      \n",
       "730 monocyte  rs1131017_RPS26  CD74  TRUE              7.422301e-03      \n",
       "    X..TF.overlap...co.eGene X..TF.overlap...background\n",
       "19   92                      280                       \n",
       "34  244                      128                       \n",
       "50  134                      238                       \n",
       "84  153                      219                       \n",
       "116 139                      233                       \n",
       "119 172                      200                       \n",
       "730  63                       69                       \n",
       "    X..no.TF.overlap...co.eGene X..background.gene...not.co.eGene\n",
       "19  1747                        9546                             \n",
       "34  6041                        5252                             \n",
       "50  3379                        7914                             \n",
       "84  3529                        7764                             \n",
       "116 3385                        7908                             \n",
       "119 3915                        7378                             \n",
       "730 3526                        6028                             \n",
       "    enrichment.fdr eQTL.SNP  SNP.overlaps.TF. Names.of.overlapping.SNPs     \n",
       "19  5.187441e-05   rs1131017 TRUE             rs1131017                     \n",
       "34  3.295330e-05   rs1131017 TRUE             rs10876864,rs1131017,rs7297175\n",
       "50  3.052929e-02   rs1131017 TRUE             rs1131017                     \n",
       "84  4.598193e-04   rs1131017 TRUE             rs10876864,rs1131017          \n",
       "116 8.236538e-03   rs1131017 TRUE             rs10876864,rs1131017,rs7297175\n",
       "119 5.461532e-05   rs1131017 TRUE             rs1131017                     \n",
       "730 3.134542e-02   rs1131017 TRUE             rs1131017                     "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "old_enrichments[(old_enrichments$eQTL..SNP.eGene. %in% c('rs1131017_RPS26')) & (old_enrichments$TF.is.a.co.eGene. == TRUE)  & ((old_enrichments$SNP.overlaps.TF. == TRUE)),]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 185,
   "id": "973e7d59-4d0d-4aaa-90a6-5ec718d3cdc7",
   "metadata": {},
   "outputs": [],
   "source": [
    "# MAF and CD74 only negative effect directions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 186,
   "id": "41107400-6b96-4024-939d-bffa6790c377",
   "metadata": {},
   "outputs": [],
   "source": [
    "# TMEM176A nothing found with remap"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5c479bb0-aeee-4286-8107-70eaebb4968e",
   "metadata": {},
   "source": [
    "# Run TRANSFAC enrichment for all cell-types"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 213,
   "id": "7ee02d44-c7a6-4c44-8957-794c603da722",
   "metadata": {},
   "outputs": [],
   "source": [
    "### Set parameters for function"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 214,
   "id": "5b6b137d-6865-4bea-bf5d-94bdbf71c96b",
   "metadata": {},
   "outputs": [],
   "source": [
    "p_val_thres = 0.05"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 215,
   "id": "aea1119e-9ccb-40e4-a43c-ad3022bf4281",
   "metadata": {},
   "outputs": [],
   "source": [
    "correction_var = 'fdr'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 216,
   "id": "52ba5757-2a17-4a25-9975-f73fe9d49888",
   "metadata": {},
   "outputs": [],
   "source": [
    "### Decide on whether to restrict the background set\n",
    "restrict_background_set = FALSE\n",
    "\n",
    "# set to TRUE for adaption"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 217,
   "id": "5fd364b9-2b0d-40a6-aadc-6c32925e8a33",
   "metadata": {},
   "outputs": [],
   "source": [
    "### Run enrichments"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 218,
   "id": "33a0acf7-c544-4278-b8e0-60506109d9aa",
   "metadata": {
    "collapsed": true,
    "jupyter": {
     "outputs_hidden": true
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1] \"DC with 58 co-eQTLs\"\n",
      "[1] \"rs7935082_MS4A7\"\n",
      "[1] 6054\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Detected custom background input, domain scope is set to 'custom'\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1] \"rs9271520_HLA-DQA2\"\n",
      "[1] 6054\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Detected custom background input, domain scope is set to 'custom'\n",
      "\n",
      "No results to show\n",
      "Please make sure that the organism is correct or set significant = FALSE\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1] \"CD4T with 500 co-eQTLs\"\n",
      "[1] \"rs111454690_HLA-DRB5\"\n",
      "[1] 11300\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Detected custom background input, domain scope is set to 'custom'\n",
      "\n",
      "No results to show\n",
      "Please make sure that the organism is correct or set significant = FALSE\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1] \"rs1131017_RPS26\"\n",
      "[1] 11300\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Detected custom background input, domain scope is set to 'custom'\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1] \"rs2741159_KRT1\"\n",
      "[1] 11300\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Detected custom background input, domain scope is set to 'custom'\n",
      "\n",
      "No results to show\n",
      "Please make sure that the organism is correct or set significant = FALSE\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1] \"rs4147638_SMDT1\"\n",
      "[1] 11300\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Detected custom background input, domain scope is set to 'custom'\n",
      "\n",
      "No results to show\n",
      "Please make sure that the organism is correct or set significant = FALSE\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1] \"rs7605824_SH3YL1\"\n",
      "[1] 11300\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Detected custom background input, domain scope is set to 'custom'\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1] \"rs7632486_CMTM8\"\n",
      "[1] 11300\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Detected custom background input, domain scope is set to 'custom'\n",
      "\n",
      "No results to show\n",
      "Please make sure that the organism is correct or set significant = FALSE\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1] \"rs9022_CLN8\"\n",
      "[1] 11300\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Detected custom background input, domain scope is set to 'custom'\n",
      "\n",
      "No results to show\n",
      "Please make sure that the organism is correct or set significant = FALSE\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1] \"rs9271520_HLA-DQA2\"\n",
      "[1] 11300\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Detected custom background input, domain scope is set to 'custom'\n",
      "\n",
      "No results to show\n",
      "Please make sure that the organism is correct or set significant = FALSE\n",
      "\n",
      "Detected custom background input, domain scope is set to 'custom'\n",
      "\n",
      "Detected custom background input, domain scope is set to 'custom'\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1] \"CD8T with 420 co-eQTLs\"\n",
      "[1] \"rs1131017_RPS26\"\n",
      "[1] 9579\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Detected custom background input, domain scope is set to 'custom'\n",
      "\n",
      "No results to show\n",
      "Please make sure that the organism is correct or set significant = FALSE\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1] \"rs4147638_SMDT1\"\n",
      "[1] 9579\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Detected custom background input, domain scope is set to 'custom'\n",
      "\n",
      "No results to show\n",
      "Please make sure that the organism is correct or set significant = FALSE\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1] \"rs6708265_PASK\"\n",
      "[1] 9579\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Detected custom background input, domain scope is set to 'custom'\n",
      "\n",
      "No results to show\n",
      "Please make sure that the organism is correct or set significant = FALSE\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1] \"rs7605824_SH3YL1\"\n",
      "[1] 9579\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Detected custom background input, domain scope is set to 'custom'\n",
      "\n",
      "No results to show\n",
      "Please make sure that the organism is correct or set significant = FALSE\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1] \"rs9271520_HLA-DQA2\"\n",
      "[1] 9579\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Detected custom background input, domain scope is set to 'custom'\n",
      "\n",
      "No results to show\n",
      "Please make sure that the organism is correct or set significant = FALSE\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1] \"rs9306156_PRMT2\"\n",
      "[1] 9579\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Detected custom background input, domain scope is set to 'custom'\n",
      "\n",
      "No results to show\n",
      "Please make sure that the organism is correct or set significant = FALSE\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1] \"monocyte with 281 co-eQTLs\"\n",
      "[1] \"rs111454690_HLA-DRB5\"\n",
      "[1] 9557\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Detected custom background input, domain scope is set to 'custom'\n",
      "\n",
      "No results to show\n",
      "Please make sure that the organism is correct or set significant = FALSE\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1] \"rs1131017_RPS26\"\n",
      "[1] 9557\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Detected custom background input, domain scope is set to 'custom'\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1] \"rs11577318_CD52\"\n",
      "[1] 9557\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Detected custom background input, domain scope is set to 'custom'\n",
      "\n",
      "No results to show\n",
      "Please make sure that the organism is correct or set significant = FALSE\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1] \"rs3758833_CTSC\"\n",
      "[1] 9557\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Detected custom background input, domain scope is set to 'custom'\n",
      "\n",
      "No results to show\n",
      "Please make sure that the organism is correct or set significant = FALSE\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1] \"rs4782899_DNAAF1\"\n",
      "[1] 9557\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Detected custom background input, domain scope is set to 'custom'\n",
      "\n",
      "No results to show\n",
      "Please make sure that the organism is correct or set significant = FALSE\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1] \"rs5756736_LGALS2\"\n",
      "[1] 9557\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Detected custom background input, domain scope is set to 'custom'\n",
      "\n",
      "No results to show\n",
      "Please make sure that the organism is correct or set significant = FALSE\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1] \"rs7806458_TMEM176A\"\n",
      "[1] 9557\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Detected custom background input, domain scope is set to 'custom'\n",
      "\n",
      "No results to show\n",
      "Please make sure that the organism is correct or set significant = FALSE\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1] \"rs7806458_TMEM176B\"\n",
      "[1] 9557\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Detected custom background input, domain scope is set to 'custom'\n",
      "\n",
      "No results to show\n",
      "Please make sure that the organism is correct or set significant = FALSE\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1] \"rs9271520_HLA-DQA2\"\n",
      "[1] 9557\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Detected custom background input, domain scope is set to 'custom'\n",
      "\n",
      "No results to show\n",
      "Please make sure that the organism is correct or set significant = FALSE\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1] \"NK with 123 co-eQTLs\"\n",
      "[1] \"rs1131017_RPS26\"\n",
      "[1] 7271\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Detected custom background input, domain scope is set to 'custom'\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1] \"rs12151742_GNLY\"\n",
      "[1] 7271\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Detected custom background input, domain scope is set to 'custom'\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1] \"rs62480001_MYOM2\"\n",
      "[1] 7271\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Detected custom background input, domain scope is set to 'custom'\n",
      "\n",
      "No results to show\n",
      "Please make sure that the organism is correct or set significant = FALSE\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1] \"B with 35 co-eQTLs\"\n",
      "[1] \"rs1131017_RPS26\"\n",
      "[1] 1729\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Detected custom background input, domain scope is set to 'custom'\n",
      "\n"
     ]
    }
   ],
   "source": [
    "enrichment<-NULL\n",
    "enrichment_summary<-NULL\n",
    "coegenes_counts_total<-NULL\n",
    "for(cell_type in c(\"DC\",\"CD4T\",\"CD8T\",\"monocyte\",\"NK\",\"B\" )){\n",
    "  # Read in the data\n",
    "  coeqtls <- fread(paste0(path, \"UT_\",cell_type, \n",
    "                         \"_coeqtls_fullresults_fixed.all.tsv.gz\"))\n",
    "  coeqtls$gene1<-gsub(\";.*\",\"\",coeqtls$Gene)\n",
    "  coeqtls$gene2<-gsub(\".*;\",\"\",coeqtls$Gene)\n",
    "  coeqtls$second_gene<-ifelse(coeqtls$gene1 == coeqtls$eqtlgen, coeqtls$gene2,\n",
    "                        coeqtls$gene1)\n",
    "  coeqtls$gene1<-NULL\n",
    "  coeqtls$gene2<-NULL\n",
    "  \n",
    "  # Take all tested genes as background\n",
    "  background_genes  <- union(coeqtls$eqtlgen,coeqtls$second_gene)\n",
    "  \n",
    "  coeqtls_sign<-coeqtls[coeqtls$gene2_isSig,]\n",
    "  \n",
    "  print(paste(cell_type,\"with\",nrow(coeqtls_sign),\"co-eQTLs\"))\n",
    "  \n",
    "  # Identify all eQTLs with at least 5 coeGenes\n",
    "  coegene_count<-coeqtls_sign%>%\n",
    "    group_by(snp_eqtlgene)%>%\n",
    "    summarise(count_coeGenes=n())%>%\n",
    "    filter(count_coeGenes>4)\n",
    "  \n",
    "  coegene_count$cell_type<-cell_type\n",
    "  coegenes_counts_total<-rbind(coegenes_counts_total,\n",
    "                               coegene_count)\n",
    "  \n",
    "  enrichment_found<-0\n",
    "  #Perform GO enrichemt separately for each eQTL\n",
    "  for(eqtl in coegene_count$snp_eqtlgene){\n",
    "      print(eqtl)\n",
    "      \n",
    "    # Optional restricted background set\n",
    "    if(restrict_background_set == TRUE){\n",
    "        background_genes = unique(c(coeqtls$eqtlgene[coeqtls$snp_eqtlgene ==  eqtl], coeqtls$second_gene[coeqtls$snp_eqtlgene ==  eqtl]))\n",
    "        }\n",
    "      print(length(background_genes))\n",
    "    \n",
    "    # Run enrichment analysis with background set\n",
    "    enrich_out <- gost(\n",
    "                                coeqtls_sign$second_gene[coeqtls_sign$snp_eqtlgene == eqtl],\n",
    "                                organism = \"hsapiens\",\n",
    "                                ordered_query = FALSE,\n",
    "                                multi_query = FALSE,\n",
    "                                significant = TRUE,\n",
    "                                exclude_iea = FALSE,\n",
    "                                measure_underrepresentation = FALSE,\n",
    "                                evcodes = FALSE,\n",
    "                                correction_method = correction_var,\n",
    "                                user_threshold = p_val_thres,\n",
    "                                custom_bg = background_genes,\n",
    "                                sources = 'TF'   # only do transfac enrichment\n",
    "                                )\n",
    "    \n",
    "    #if(nrow(enrich_out$result[enrich_out$result$source == 'TF',])>0){\n",
    "    if(!is.null(enrich_out)){\n",
    "      # Save if a enrichment was found\n",
    "      enrichment_found<-enrichment_found+1\n",
    "      \n",
    "      # Save result dataframe\n",
    "      res<-enrich_out$result[enrich_out$result$source == 'TF',]\n",
    "      res$cell_type<-cell_type\n",
    "      res$snp_eGene<-eqtl\n",
    "      enrichment<-rbind(enrichment,\n",
    "                        res)\n",
    "    }\n",
    "\n",
    "  }\n",
    "  \n",
    "  enrichment_summary<-rbind(enrichment_summary,\n",
    "                            data.frame(cell_type,\n",
    "                                       n_eqtls_freq=nrow(coegene_count),\n",
    "                                       n_enrich=enrichment_found,\n",
    "                                       freq_enrich=enrichment_found/nrow(coegene_count)))\n",
    "  \n",
    "    \n",
    "    \n",
    "  #Check for CD4T specificallly for RPS26 the positive & negative coeGenes separately\n",
    "  if(cell_type==\"CD4T\"){\n",
    "    eqtl<-\"rs1131017_RPS26\"\n",
    "    \n",
    "    #Test positive coeGenes (MAF not correctly flipped here)\n",
    "    enrich_out <-gost(\n",
    "                        coeqtls_sign$second_gene[coeqtls_sign$snp_eqtlgene == eqtl &\n",
    "                                                          coeqtls_sign$MetaPZ < 0],\n",
    "                        organism = \"hsapiens\",\n",
    "                        ordered_query = FALSE,\n",
    "                        multi_query = FALSE,\n",
    "                        significant = TRUE,\n",
    "                        exclude_iea = FALSE,\n",
    "                        measure_underrepresentation = FALSE,\n",
    "                        evcodes = FALSE,\n",
    "                        correction_method = correction_var,\n",
    "                        user_threshold = p_val_thres,\n",
    "                        custom_bg = background_genes,\n",
    "                        sources = 'TF'   # only do transfac enrichment\n",
    "                        )\n",
    "      \n",
    "      \n",
    "      \n",
    "    \n",
    "    if(!is.null(enrich_out)){\n",
    "      \n",
    "      # Save if a enrichment was found\n",
    "      enrichment_found<-enrichment_found+1\n",
    "      \n",
    "      # Save result dataframe\n",
    "      res<- enrich_out$result[enrich_out$result$source == 'TF',]\n",
    "      res$cell_type<-cell_type\n",
    "      res$snp_eGene<-paste0(eqtl,\"_positive\")\n",
    "      enrichment<-rbind(enrichment,\n",
    "                        res)\n",
    "    }\n",
    "    \n",
    "    #Test negative coeGenes (MAF not correctly flipped here)\n",
    "    enrich_out <-gost(\n",
    "                       coeqtls_sign$second_gene[coeqtls_sign$snp_eqtlgene == eqtl &\n",
    "                                                          coeqtls_sign$MetaPZ > 0],\n",
    "                        organism = \"hsapiens\",\n",
    "                        ordered_query = FALSE,\n",
    "                        multi_query = FALSE,\n",
    "                        significant = TRUE,\n",
    "                        exclude_iea = FALSE,\n",
    "                        measure_underrepresentation = FALSE,\n",
    "                        evcodes = FALSE,\n",
    "                        correction_method = correction_var,\n",
    "                        user_threshold = p_val_thres,\n",
    "                        custom_bg = background_genes,\n",
    "                        sources = 'TF'   # only do transfac enrichment\n",
    "                        )\n",
    "    \n",
    "    if(!is.null(enrich_out)){\n",
    "      \n",
    "      # Save if a enrichment was found\n",
    "      enrichment_found<-enrichment_found+1\n",
    "      \n",
    "      # Save result dataframe\n",
    "      res<-enrich_out$result[enrich_out$result$source == 'TF',]\n",
    "      res$cell_type<-cell_type\n",
    "      res$snp_eGene<-paste0(eqtl,\"_negative\")\n",
    "      enrichment<-rbind(enrichment,\n",
    "                        res)\n",
    "    }\n",
    "  }\n",
    "  \n",
    " \n",
    "  }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 219,
   "id": "2161ef04-9fdc-4e14-b318-089eb91db546",
   "metadata": {},
   "outputs": [],
   "source": [
    "### Inspect result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 220,
   "id": "0ae39aef-8d0c-4f59-821e-d1672811783e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"dataframe\">\n",
       "<caption>A data.frame: 6 × 16</caption>\n",
       "<thead>\n",
       "\t<tr><th></th><th scope=col>query</th><th scope=col>significant</th><th scope=col>p_value</th><th scope=col>term_size</th><th scope=col>query_size</th><th scope=col>intersection_size</th><th scope=col>precision</th><th scope=col>recall</th><th scope=col>term_id</th><th scope=col>source</th><th scope=col>term_name</th><th scope=col>effective_domain_size</th><th scope=col>source_order</th><th scope=col>parents</th><th scope=col>cell_type</th><th scope=col>snp_eGene</th></tr>\n",
       "\t<tr><th></th><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;lgl&gt;</th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;list&gt;</th><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;chr&gt;</th></tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "\t<tr><th scope=row>1</th><td>query_1</td><td>TRUE</td><td>0.049610826</td><td>2342</td><td> 27</td><td> 22</td><td>0.8148148</td><td>0.009393681</td><td>TF:M00665  </td><td>TF</td><td>Factor: Sp3; motif: ASMCTTGGGSRGGG                        </td><td> 5705</td><td>7882</td><td>TF:M00000</td><td>DC  </td><td>rs7935082_MS4A7         </td></tr>\n",
       "\t<tr><th scope=row>2</th><td>query_1</td><td>TRUE</td><td>0.049610826</td><td>2303</td><td> 27</td><td> 22</td><td>0.8148148</td><td>0.009552757</td><td>TF:M03582  </td><td>TF</td><td>Factor: TWIST; motif: CACCTGG                             </td><td> 5705</td><td>8844</td><td>TF:M00000</td><td>DC  </td><td>rs7935082_MS4A7         </td></tr>\n",
       "\t<tr><th scope=row>3</th><td>query_1</td><td>TRUE</td><td>0.003978389</td><td>3447</td><td>351</td><td>163</td><td>0.4643875</td><td>0.047287496</td><td>TF:M11438  </td><td>TF</td><td>Factor: SAP-1; motif: NTCGTAAATGCN                        </td><td>10167</td><td>1882</td><td>TF:M00000</td><td>CD4T</td><td>rs1131017_RPS26         </td></tr>\n",
       "\t<tr><th scope=row>4</th><td>query_1</td><td>TRUE</td><td>0.022537569</td><td>3025</td><td> 20</td><td> 16</td><td>0.8000000</td><td>0.005289256</td><td>TF:M08413  </td><td>TF</td><td>Factor: TEF-3:C/EBPdelta; motif: RGWATGYNRTTRCGYAAY       </td><td>10167</td><td>8434</td><td>TF:M00000</td><td>CD4T</td><td>rs7605824_SH3YL1        </td></tr>\n",
       "\t<tr><th scope=row>5</th><td>query_1</td><td>TRUE</td><td>0.002470867</td><td>3285</td><td>191</td><td> 95</td><td>0.4973822</td><td>0.028919330</td><td>TF:M10785  </td><td>TF</td><td>Factor: hoxa9; motif: RTCGTWANNN                          </td><td>10167</td><td>3774</td><td>TF:M00000</td><td>CD4T</td><td>rs1131017_RPS26_positive</td></tr>\n",
       "\t<tr><th scope=row>6</th><td>query_1</td><td>TRUE</td><td>0.003339438</td><td>1184</td><td>191</td><td> 46</td><td>0.2408377</td><td>0.038851351</td><td>TF:M04696_1</td><td>TF</td><td>Factor: YY1; motif: GCCGCCATNTTGNNNNNGGNCN; match class: 1</td><td>10167</td><td>9013</td><td>TF:M04696</td><td>CD4T</td><td>rs1131017_RPS26_positive</td></tr>\n",
       "</tbody>\n",
       "</table>\n"
      ],
      "text/latex": [
       "A data.frame: 6 × 16\n",
       "\\begin{tabular}{r|llllllllllllllll}\n",
       "  & query & significant & p\\_value & term\\_size & query\\_size & intersection\\_size & precision & recall & term\\_id & source & term\\_name & effective\\_domain\\_size & source\\_order & parents & cell\\_type & snp\\_eGene\\\\\n",
       "  & <chr> & <lgl> & <dbl> & <int> & <int> & <int> & <dbl> & <dbl> & <chr> & <chr> & <chr> & <int> & <int> & <list> & <chr> & <chr>\\\\\n",
       "\\hline\n",
       "\t1 & query\\_1 & TRUE & 0.049610826 & 2342 &  27 &  22 & 0.8148148 & 0.009393681 & TF:M00665   & TF & Factor: Sp3; motif: ASMCTTGGGSRGGG                         &  5705 & 7882 & TF:M00000 & DC   & rs7935082\\_MS4A7         \\\\\n",
       "\t2 & query\\_1 & TRUE & 0.049610826 & 2303 &  27 &  22 & 0.8148148 & 0.009552757 & TF:M03582   & TF & Factor: TWIST; motif: CACCTGG                              &  5705 & 8844 & TF:M00000 & DC   & rs7935082\\_MS4A7         \\\\\n",
       "\t3 & query\\_1 & TRUE & 0.003978389 & 3447 & 351 & 163 & 0.4643875 & 0.047287496 & TF:M11438   & TF & Factor: SAP-1; motif: NTCGTAAATGCN                         & 10167 & 1882 & TF:M00000 & CD4T & rs1131017\\_RPS26         \\\\\n",
       "\t4 & query\\_1 & TRUE & 0.022537569 & 3025 &  20 &  16 & 0.8000000 & 0.005289256 & TF:M08413   & TF & Factor: TEF-3:C/EBPdelta; motif: RGWATGYNRTTRCGYAAY        & 10167 & 8434 & TF:M00000 & CD4T & rs7605824\\_SH3YL1        \\\\\n",
       "\t5 & query\\_1 & TRUE & 0.002470867 & 3285 & 191 &  95 & 0.4973822 & 0.028919330 & TF:M10785   & TF & Factor: hoxa9; motif: RTCGTWANNN                           & 10167 & 3774 & TF:M00000 & CD4T & rs1131017\\_RPS26\\_positive\\\\\n",
       "\t6 & query\\_1 & TRUE & 0.003339438 & 1184 & 191 &  46 & 0.2408377 & 0.038851351 & TF:M04696\\_1 & TF & Factor: YY1; motif: GCCGCCATNTTGNNNNNGGNCN; match class: 1 & 10167 & 9013 & TF:M04696 & CD4T & rs1131017\\_RPS26\\_positive\\\\\n",
       "\\end{tabular}\n"
      ],
      "text/markdown": [
       "\n",
       "A data.frame: 6 × 16\n",
       "\n",
       "| <!--/--> | query &lt;chr&gt; | significant &lt;lgl&gt; | p_value &lt;dbl&gt; | term_size &lt;int&gt; | query_size &lt;int&gt; | intersection_size &lt;int&gt; | precision &lt;dbl&gt; | recall &lt;dbl&gt; | term_id &lt;chr&gt; | source &lt;chr&gt; | term_name &lt;chr&gt; | effective_domain_size &lt;int&gt; | source_order &lt;int&gt; | parents &lt;list&gt; | cell_type &lt;chr&gt; | snp_eGene &lt;chr&gt; |\n",
       "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n",
       "| 1 | query_1 | TRUE | 0.049610826 | 2342 |  27 |  22 | 0.8148148 | 0.009393681 | TF:M00665   | TF | Factor: Sp3; motif: ASMCTTGGGSRGGG                         |  5705 | 7882 | TF:M00000 | DC   | rs7935082_MS4A7          |\n",
       "| 2 | query_1 | TRUE | 0.049610826 | 2303 |  27 |  22 | 0.8148148 | 0.009552757 | TF:M03582   | TF | Factor: TWIST; motif: CACCTGG                              |  5705 | 8844 | TF:M00000 | DC   | rs7935082_MS4A7          |\n",
       "| 3 | query_1 | TRUE | 0.003978389 | 3447 | 351 | 163 | 0.4643875 | 0.047287496 | TF:M11438   | TF | Factor: SAP-1; motif: NTCGTAAATGCN                         | 10167 | 1882 | TF:M00000 | CD4T | rs1131017_RPS26          |\n",
       "| 4 | query_1 | TRUE | 0.022537569 | 3025 |  20 |  16 | 0.8000000 | 0.005289256 | TF:M08413   | TF | Factor: TEF-3:C/EBPdelta; motif: RGWATGYNRTTRCGYAAY        | 10167 | 8434 | TF:M00000 | CD4T | rs7605824_SH3YL1         |\n",
       "| 5 | query_1 | TRUE | 0.002470867 | 3285 | 191 |  95 | 0.4973822 | 0.028919330 | TF:M10785   | TF | Factor: hoxa9; motif: RTCGTWANNN                           | 10167 | 3774 | TF:M00000 | CD4T | rs1131017_RPS26_positive |\n",
       "| 6 | query_1 | TRUE | 0.003339438 | 1184 | 191 |  46 | 0.2408377 | 0.038851351 | TF:M04696_1 | TF | Factor: YY1; motif: GCCGCCATNTTGNNNNNGGNCN; match class: 1 | 10167 | 9013 | TF:M04696 | CD4T | rs1131017_RPS26_positive |\n",
       "\n"
      ],
      "text/plain": [
       "  query   significant p_value     term_size query_size intersection_size\n",
       "1 query_1 TRUE        0.049610826 2342       27         22              \n",
       "2 query_1 TRUE        0.049610826 2303       27         22              \n",
       "3 query_1 TRUE        0.003978389 3447      351        163              \n",
       "4 query_1 TRUE        0.022537569 3025       20         16              \n",
       "5 query_1 TRUE        0.002470867 3285      191         95              \n",
       "6 query_1 TRUE        0.003339438 1184      191         46              \n",
       "  precision recall      term_id     source\n",
       "1 0.8148148 0.009393681 TF:M00665   TF    \n",
       "2 0.8148148 0.009552757 TF:M03582   TF    \n",
       "3 0.4643875 0.047287496 TF:M11438   TF    \n",
       "4 0.8000000 0.005289256 TF:M08413   TF    \n",
       "5 0.4973822 0.028919330 TF:M10785   TF    \n",
       "6 0.2408377 0.038851351 TF:M04696_1 TF    \n",
       "  term_name                                                 \n",
       "1 Factor: Sp3; motif: ASMCTTGGGSRGGG                        \n",
       "2 Factor: TWIST; motif: CACCTGG                             \n",
       "3 Factor: SAP-1; motif: NTCGTAAATGCN                        \n",
       "4 Factor: TEF-3:C/EBPdelta; motif: RGWATGYNRTTRCGYAAY       \n",
       "5 Factor: hoxa9; motif: RTCGTWANNN                          \n",
       "6 Factor: YY1; motif: GCCGCCATNTTGNNNNNGGNCN; match class: 1\n",
       "  effective_domain_size source_order parents   cell_type\n",
       "1  5705                 7882         TF:M00000 DC       \n",
       "2  5705                 8844         TF:M00000 DC       \n",
       "3 10167                 1882         TF:M00000 CD4T     \n",
       "4 10167                 8434         TF:M00000 CD4T     \n",
       "5 10167                 3774         TF:M00000 CD4T     \n",
       "6 10167                 9013         TF:M04696 CD4T     \n",
       "  snp_eGene               \n",
       "1 rs7935082_MS4A7         \n",
       "2 rs7935082_MS4A7         \n",
       "3 rs1131017_RPS26         \n",
       "4 rs7605824_SH3YL1        \n",
       "5 rs1131017_RPS26_positive\n",
       "6 rs1131017_RPS26_positive"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "head(enrichment)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 221,
   "id": "724fe97f-5620-4d55-9d2f-154c00f7bcd8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"dataframe\">\n",
       "<caption>A data.frame: 6 × 4</caption>\n",
       "<thead>\n",
       "\t<tr><th></th><th scope=col>cell_type</th><th scope=col>n_eqtls_freq</th><th scope=col>n_enrich</th><th scope=col>freq_enrich</th></tr>\n",
       "\t<tr><th></th><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;dbl&gt;</th></tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "\t<tr><th scope=row>1</th><td>DC      </td><td>2</td><td>1</td><td>0.5000000</td></tr>\n",
       "\t<tr><th scope=row>2</th><td>CD4T    </td><td>8</td><td>2</td><td>0.2500000</td></tr>\n",
       "\t<tr><th scope=row>3</th><td>CD8T    </td><td>6</td><td>0</td><td>0.0000000</td></tr>\n",
       "\t<tr><th scope=row>4</th><td>monocyte</td><td>9</td><td>1</td><td>0.1111111</td></tr>\n",
       "\t<tr><th scope=row>5</th><td>NK      </td><td>3</td><td>2</td><td>0.6666667</td></tr>\n",
       "\t<tr><th scope=row>6</th><td>B       </td><td>1</td><td>1</td><td>1.0000000</td></tr>\n",
       "</tbody>\n",
       "</table>\n"
      ],
      "text/latex": [
       "A data.frame: 6 × 4\n",
       "\\begin{tabular}{r|llll}\n",
       "  & cell\\_type & n\\_eqtls\\_freq & n\\_enrich & freq\\_enrich\\\\\n",
       "  & <chr> & <int> & <dbl> & <dbl>\\\\\n",
       "\\hline\n",
       "\t1 & DC       & 2 & 1 & 0.5000000\\\\\n",
       "\t2 & CD4T     & 8 & 2 & 0.2500000\\\\\n",
       "\t3 & CD8T     & 6 & 0 & 0.0000000\\\\\n",
       "\t4 & monocyte & 9 & 1 & 0.1111111\\\\\n",
       "\t5 & NK       & 3 & 2 & 0.6666667\\\\\n",
       "\t6 & B        & 1 & 1 & 1.0000000\\\\\n",
       "\\end{tabular}\n"
      ],
      "text/markdown": [
       "\n",
       "A data.frame: 6 × 4\n",
       "\n",
       "| <!--/--> | cell_type &lt;chr&gt; | n_eqtls_freq &lt;int&gt; | n_enrich &lt;dbl&gt; | freq_enrich &lt;dbl&gt; |\n",
       "|---|---|---|---|---|\n",
       "| 1 | DC       | 2 | 1 | 0.5000000 |\n",
       "| 2 | CD4T     | 8 | 2 | 0.2500000 |\n",
       "| 3 | CD8T     | 6 | 0 | 0.0000000 |\n",
       "| 4 | monocyte | 9 | 1 | 0.1111111 |\n",
       "| 5 | NK       | 3 | 2 | 0.6666667 |\n",
       "| 6 | B        | 1 | 1 | 1.0000000 |\n",
       "\n"
      ],
      "text/plain": [
       "  cell_type n_eqtls_freq n_enrich freq_enrich\n",
       "1 DC        2            1        0.5000000  \n",
       "2 CD4T      8            2        0.2500000  \n",
       "3 CD8T      6            0        0.0000000  \n",
       "4 monocyte  9            1        0.1111111  \n",
       "5 NK        3            2        0.6666667  \n",
       "6 B         1            1        1.0000000  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "head(enrichment_summary)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 223,
   "id": "90e7aea5-2e69-4412-89b7-cbd605fb836d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"dataframe\">\n",
       "<caption>A tibble: 6 × 3</caption>\n",
       "<thead>\n",
       "\t<tr><th scope=col>snp_eqtlgene</th><th scope=col>count_coeGenes</th><th scope=col>cell_type</th></tr>\n",
       "\t<tr><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;chr&gt;</th></tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "\t<tr><td>rs7935082_MS4A7     </td><td> 30</td><td>DC  </td></tr>\n",
       "\t<tr><td>rs9271520_HLA-DQA2  </td><td> 13</td><td>DC  </td></tr>\n",
       "\t<tr><td>rs111454690_HLA-DRB5</td><td> 19</td><td>CD4T</td></tr>\n",
       "\t<tr><td>rs1131017_RPS26     </td><td>372</td><td>CD4T</td></tr>\n",
       "\t<tr><td>rs2741159_KRT1      </td><td>  8</td><td>CD4T</td></tr>\n",
       "\t<tr><td>rs4147638_SMDT1     </td><td> 19</td><td>CD4T</td></tr>\n",
       "</tbody>\n",
       "</table>\n"
      ],
      "text/latex": [
       "A tibble: 6 × 3\n",
       "\\begin{tabular}{lll}\n",
       " snp\\_eqtlgene & count\\_coeGenes & cell\\_type\\\\\n",
       " <chr> & <int> & <chr>\\\\\n",
       "\\hline\n",
       "\t rs7935082\\_MS4A7      &  30 & DC  \\\\\n",
       "\t rs9271520\\_HLA-DQA2   &  13 & DC  \\\\\n",
       "\t rs111454690\\_HLA-DRB5 &  19 & CD4T\\\\\n",
       "\t rs1131017\\_RPS26      & 372 & CD4T\\\\\n",
       "\t rs2741159\\_KRT1       &   8 & CD4T\\\\\n",
       "\t rs4147638\\_SMDT1      &  19 & CD4T\\\\\n",
       "\\end{tabular}\n"
      ],
      "text/markdown": [
       "\n",
       "A tibble: 6 × 3\n",
       "\n",
       "| snp_eqtlgene &lt;chr&gt; | count_coeGenes &lt;int&gt; | cell_type &lt;chr&gt; |\n",
       "|---|---|---|\n",
       "| rs7935082_MS4A7      |  30 | DC   |\n",
       "| rs9271520_HLA-DQA2   |  13 | DC   |\n",
       "| rs111454690_HLA-DRB5 |  19 | CD4T |\n",
       "| rs1131017_RPS26      | 372 | CD4T |\n",
       "| rs2741159_KRT1       |   8 | CD4T |\n",
       "| rs4147638_SMDT1      |  19 | CD4T |\n",
       "\n"
      ],
      "text/plain": [
       "  snp_eqtlgene         count_coeGenes cell_type\n",
       "1 rs7935082_MS4A7       30            DC       \n",
       "2 rs9271520_HLA-DQA2    13            DC       \n",
       "3 rs111454690_HLA-DRB5  19            CD4T     \n",
       "4 rs1131017_RPS26      372            CD4T     \n",
       "5 rs2741159_KRT1         8            CD4T     \n",
       "6 rs4147638_SMDT1       19            CD4T     "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "head(coegenes_counts_total)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 225,
   "id": "a7788afe-58c2-41bf-bd53-bec13a912a46",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "0.0496108257480342"
      ],
      "text/latex": [
       "0.0496108257480342"
      ],
      "text/markdown": [
       "0.0496108257480342"
      ],
      "text/plain": [
       "[1] 0.04961083"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "max(enrichment$p_value)  # set to same level"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 226,
   "id": "ba254471-a07d-4b06-9280-970d02582110",
   "metadata": {},
   "outputs": [],
   "source": [
    "### Evaluate amount of enrichments found per cell-type with set p-value threshold"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 227,
   "id": "6729f307-8da0-4700-9b99-d4cc8f9b2fce",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"dataframe\">\n",
       "<caption>A grouped_df: 5 × 2</caption>\n",
       "<thead>\n",
       "\t<tr><th scope=col>cell_type</th><th scope=col>n</th></tr>\n",
       "\t<tr><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;int&gt;</th></tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "\t<tr><td>B       </td><td> 3</td></tr>\n",
       "\t<tr><td>CD4T    </td><td>54</td></tr>\n",
       "\t<tr><td>DC      </td><td> 2</td></tr>\n",
       "\t<tr><td>monocyte</td><td>40</td></tr>\n",
       "\t<tr><td>NK      </td><td>21</td></tr>\n",
       "</tbody>\n",
       "</table>\n"
      ],
      "text/latex": [
       "A grouped\\_df: 5 × 2\n",
       "\\begin{tabular}{ll}\n",
       " cell\\_type & n\\\\\n",
       " <chr> & <int>\\\\\n",
       "\\hline\n",
       "\t B        &  3\\\\\n",
       "\t CD4T     & 54\\\\\n",
       "\t DC       &  2\\\\\n",
       "\t monocyte & 40\\\\\n",
       "\t NK       & 21\\\\\n",
       "\\end{tabular}\n"
      ],
      "text/markdown": [
       "\n",
       "A grouped_df: 5 × 2\n",
       "\n",
       "| cell_type &lt;chr&gt; | n &lt;int&gt; |\n",
       "|---|---|\n",
       "| B        |  3 |\n",
       "| CD4T     | 54 |\n",
       "| DC       |  2 |\n",
       "| monocyte | 40 |\n",
       "| NK       | 21 |\n",
       "\n"
      ],
      "text/plain": [
       "  cell_type n \n",
       "1 B          3\n",
       "2 CD4T      54\n",
       "3 DC         2\n",
       "4 monocyte  40\n",
       "5 NK        21"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "enrichment %>% group_by(cell_type) %>% count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 228,
   "id": "7d3dbfdc-2862-4ead-be5a-7645b8a170ef",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "120"
      ],
      "text/latex": [
       "120"
      ],
      "text/markdown": [
       "120"
      ],
      "text/plain": [
       "[1] 120"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "nrow(enrichment)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 230,
   "id": "8a481a49-f784-45a6-b640-e33cf122469f",
   "metadata": {},
   "outputs": [],
   "source": [
    "### Save the enrichment result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 231,
   "id": "f5e929c8-3fd4-4023-8483-3ef1bb3335d8",
   "metadata": {},
   "outputs": [],
   "source": [
    "enrichment$parents = NULL"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 232,
   "id": "3db92875-75a3-436f-b26c-e116bb15e0ab",
   "metadata": {},
   "outputs": [],
   "source": [
    "write.csv(enrichment, paste0(path, \"transfac_results/TRANSFAC_Enrichments.csv\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a1c57807-ebfa-4ef6-ad86-4022bcee0fe0",
   "metadata": {},
   "source": [
    "# Compare to previous enrichment results with Remap"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 233,
   "id": "08a4064d-3059-422a-9406-0cac4c75830b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"dataframe\">\n",
       "<caption>A data.frame: 2 × 13</caption>\n",
       "<thead>\n",
       "\t<tr><th></th><th scope=col>Cell.type</th><th scope=col>eQTL..SNP.eGene.</th><th scope=col>TF</th><th scope=col>TF.is.a.co.eGene.</th><th scope=col>enrichment.p.value</th><th scope=col>X..TF.overlap...co.eGene</th><th scope=col>X..TF.overlap...background</th><th scope=col>X..no.TF.overlap...co.eGene</th><th scope=col>X..background.gene...not.co.eGene</th><th scope=col>enrichment.fdr</th><th scope=col>eQTL.SNP</th><th scope=col>SNP.overlaps.TF.</th><th scope=col>Names.of.overlapping.SNPs</th></tr>\n",
       "\t<tr><th></th><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;lgl&gt;</th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;lgl&gt;</th><th scope=col>&lt;chr&gt;</th></tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "\t<tr><th scope=row>1</th><td>CD4T</td><td>rs111454690_HLA-DRB5</td><td>CDK8   </td><td>FALSE</td><td>9.630369e-06</td><td>14</td><td>5</td><td>2778</td><td> 8515</td><td>1.640373e-03</td><td>rs111454690</td><td>FALSE</td><td></td></tr>\n",
       "\t<tr><th scope=row>2</th><td>CD4T</td><td>rs111454690_HLA-DRB5</td><td>SNRNP70</td><td>FALSE</td><td>1.209254e-09</td><td>11</td><td>8</td><td> 649</td><td>10644</td><td>6.179288e-07</td><td>rs111454690</td><td>FALSE</td><td></td></tr>\n",
       "</tbody>\n",
       "</table>\n"
      ],
      "text/latex": [
       "A data.frame: 2 × 13\n",
       "\\begin{tabular}{r|lllllllllllll}\n",
       "  & Cell.type & eQTL..SNP.eGene. & TF & TF.is.a.co.eGene. & enrichment.p.value & X..TF.overlap...co.eGene & X..TF.overlap...background & X..no.TF.overlap...co.eGene & X..background.gene...not.co.eGene & enrichment.fdr & eQTL.SNP & SNP.overlaps.TF. & Names.of.overlapping.SNPs\\\\\n",
       "  & <chr> & <chr> & <chr> & <lgl> & <dbl> & <int> & <int> & <int> & <int> & <dbl> & <chr> & <lgl> & <chr>\\\\\n",
       "\\hline\n",
       "\t1 & CD4T & rs111454690\\_HLA-DRB5 & CDK8    & FALSE & 9.630369e-06 & 14 & 5 & 2778 &  8515 & 1.640373e-03 & rs111454690 & FALSE & \\\\\n",
       "\t2 & CD4T & rs111454690\\_HLA-DRB5 & SNRNP70 & FALSE & 1.209254e-09 & 11 & 8 &  649 & 10644 & 6.179288e-07 & rs111454690 & FALSE & \\\\\n",
       "\\end{tabular}\n"
      ],
      "text/markdown": [
       "\n",
       "A data.frame: 2 × 13\n",
       "\n",
       "| <!--/--> | Cell.type &lt;chr&gt; | eQTL..SNP.eGene. &lt;chr&gt; | TF &lt;chr&gt; | TF.is.a.co.eGene. &lt;lgl&gt; | enrichment.p.value &lt;dbl&gt; | X..TF.overlap...co.eGene &lt;int&gt; | X..TF.overlap...background &lt;int&gt; | X..no.TF.overlap...co.eGene &lt;int&gt; | X..background.gene...not.co.eGene &lt;int&gt; | enrichment.fdr &lt;dbl&gt; | eQTL.SNP &lt;chr&gt; | SNP.overlaps.TF. &lt;lgl&gt; | Names.of.overlapping.SNPs &lt;chr&gt; |\n",
       "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n",
       "| 1 | CD4T | rs111454690_HLA-DRB5 | CDK8    | FALSE | 9.630369e-06 | 14 | 5 | 2778 |  8515 | 1.640373e-03 | rs111454690 | FALSE | <!----> |\n",
       "| 2 | CD4T | rs111454690_HLA-DRB5 | SNRNP70 | FALSE | 1.209254e-09 | 11 | 8 |  649 | 10644 | 6.179288e-07 | rs111454690 | FALSE | <!----> |\n",
       "\n"
      ],
      "text/plain": [
       "  Cell.type eQTL..SNP.eGene.     TF      TF.is.a.co.eGene. enrichment.p.value\n",
       "1 CD4T      rs111454690_HLA-DRB5 CDK8    FALSE             9.630369e-06      \n",
       "2 CD4T      rs111454690_HLA-DRB5 SNRNP70 FALSE             1.209254e-09      \n",
       "  X..TF.overlap...co.eGene X..TF.overlap...background\n",
       "1 14                       5                         \n",
       "2 11                       8                         \n",
       "  X..no.TF.overlap...co.eGene X..background.gene...not.co.eGene enrichment.fdr\n",
       "1 2778                         8515                             1.640373e-03  \n",
       "2  649                        10644                             6.179288e-07  \n",
       "  eQTL.SNP    SNP.overlaps.TF. Names.of.overlapping.SNPs\n",
       "1 rs111454690 FALSE                                     \n",
       "2 rs111454690 FALSE                                     "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "head(old_enrichments,2)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "238a1fe5-72cf-4dd4-96b2-c25e18fa41e5",
   "metadata": {},
   "source": [
    "## Compare amount of enrichments"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 234,
   "id": "8058ccda-a072-443d-9b29-66fef671cd7c",
   "metadata": {},
   "outputs": [],
   "source": [
    "amount_enrichments_old = old_enrichments %>% group_by(Cell.type, eQTL..SNP.eGene.) %>% count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 235,
   "id": "baac2391-4638-4e42-923e-912226128e43",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"dataframe\">\n",
       "<caption>A grouped_df: 6 × 3</caption>\n",
       "<thead>\n",
       "\t<tr><th scope=col>Cell.type</th><th scope=col>eQTL..SNP.eGene.</th><th scope=col>n</th></tr>\n",
       "\t<tr><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;int&gt;</th></tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "\t<tr><td>B   </td><td>rs1131017_RPS26         </td><td> 82</td></tr>\n",
       "\t<tr><td>CD4T</td><td>rs111454690_HLA-DRB5    </td><td> 14</td></tr>\n",
       "\t<tr><td>CD4T</td><td>rs1131017_RPS26         </td><td>134</td></tr>\n",
       "\t<tr><td>CD4T</td><td>rs1131017_RPS26_negative</td><td> 93</td></tr>\n",
       "\t<tr><td>CD4T</td><td>rs1131017_RPS26_positive</td><td>125</td></tr>\n",
       "\t<tr><td>CD4T</td><td>rs4147638_SMDT1         </td><td> 14</td></tr>\n",
       "</tbody>\n",
       "</table>\n"
      ],
      "text/latex": [
       "A grouped\\_df: 6 × 3\n",
       "\\begin{tabular}{lll}\n",
       " Cell.type & eQTL..SNP.eGene. & n\\\\\n",
       " <chr> & <chr> & <int>\\\\\n",
       "\\hline\n",
       "\t B    & rs1131017\\_RPS26          &  82\\\\\n",
       "\t CD4T & rs111454690\\_HLA-DRB5     &  14\\\\\n",
       "\t CD4T & rs1131017\\_RPS26          & 134\\\\\n",
       "\t CD4T & rs1131017\\_RPS26\\_negative &  93\\\\\n",
       "\t CD4T & rs1131017\\_RPS26\\_positive & 125\\\\\n",
       "\t CD4T & rs4147638\\_SMDT1          &  14\\\\\n",
       "\\end{tabular}\n"
      ],
      "text/markdown": [
       "\n",
       "A grouped_df: 6 × 3\n",
       "\n",
       "| Cell.type &lt;chr&gt; | eQTL..SNP.eGene. &lt;chr&gt; | n &lt;int&gt; |\n",
       "|---|---|---|\n",
       "| B    | rs1131017_RPS26          |  82 |\n",
       "| CD4T | rs111454690_HLA-DRB5     |  14 |\n",
       "| CD4T | rs1131017_RPS26          | 134 |\n",
       "| CD4T | rs1131017_RPS26_negative |  93 |\n",
       "| CD4T | rs1131017_RPS26_positive | 125 |\n",
       "| CD4T | rs4147638_SMDT1          |  14 |\n",
       "\n"
      ],
      "text/plain": [
       "  Cell.type eQTL..SNP.eGene.         n  \n",
       "1 B         rs1131017_RPS26           82\n",
       "2 CD4T      rs111454690_HLA-DRB5      14\n",
       "3 CD4T      rs1131017_RPS26          134\n",
       "4 CD4T      rs1131017_RPS26_negative  93\n",
       "5 CD4T      rs1131017_RPS26_positive 125\n",
       "6 CD4T      rs4147638_SMDT1           14"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "head(amount_enrichments_old)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 236,
   "id": "6f219e98-8ec6-45fa-b145-7b8086806dd9",
   "metadata": {},
   "outputs": [],
   "source": [
    "colnames(amount_enrichments_old) = c('cell_type', 'snp_eGene', 'ReMap_amount')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 237,
   "id": "9bcc8ddb-a123-4fe6-b1bb-8e6a5d451519",
   "metadata": {},
   "outputs": [],
   "source": [
    "transfac_enrichments = enrichment %>% group_by(cell_type, snp_eGene) %>% count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 238,
   "id": "ce779ce7-e46a-466f-be5b-1cab27eadd2c",
   "metadata": {},
   "outputs": [],
   "source": [
    "colnames(transfac_enrichments)= c('cell_type', 'snp_eGene', 'TRANSFAC_amount')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 239,
   "id": "06ae3596-604f-4ebc-b6e9-66150ffb9559",
   "metadata": {},
   "outputs": [],
   "source": [
    "overview = merge(amount_enrichments_old, transfac_enrichments, all.x = TRUE, all.y = TRUE)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 240,
   "id": "7b372d6d-1e65-4750-9414-6cd3b5294a05",
   "metadata": {},
   "outputs": [],
   "source": [
    "### Result of comparisoon"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 245,
   "id": "6754a2a0-cdef-4126-9042-5820a8f65f62",
   "metadata": {},
   "outputs": [],
   "source": [
    "overview[is.na(overview)]= 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 246,
   "id": "6fc4ba13-519b-4ccd-bf51-967295a82d06",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"dataframe\">\n",
       "<caption>A data.frame: 19 × 4</caption>\n",
       "<thead>\n",
       "\t<tr><th></th><th scope=col>cell_type</th><th scope=col>snp_eGene</th><th scope=col>ReMap_amount</th><th scope=col>TRANSFAC_amount</th></tr>\n",
       "\t<tr><th></th><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;dbl&gt;</th></tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "\t<tr><th scope=row>5</th><td>CD4T    </td><td>rs1131017_RPS26_positive</td><td>125</td><td>51</td></tr>\n",
       "\t<tr><th scope=row>16</th><td>monocyte</td><td>rs1131017_RPS26         </td><td>145</td><td>40</td></tr>\n",
       "\t<tr><th scope=row>18</th><td>NK      </td><td>rs1131017_RPS26         </td><td>132</td><td>20</td></tr>\n",
       "\t<tr><th scope=row>1</th><td>B       </td><td>rs1131017_RPS26         </td><td> 82</td><td> 3</td></tr>\n",
       "\t<tr><th scope=row>14</th><td>DC      </td><td>rs7935082_MS4A7         </td><td>  0</td><td> 2</td></tr>\n",
       "\t<tr><th scope=row>3</th><td>CD4T    </td><td>rs1131017_RPS26         </td><td>134</td><td> 1</td></tr>\n",
       "\t<tr><th scope=row>4</th><td>CD4T    </td><td>rs1131017_RPS26_negative</td><td> 93</td><td> 1</td></tr>\n",
       "\t<tr><th scope=row>7</th><td>CD4T    </td><td>rs7605824_SH3YL1        </td><td> 58</td><td> 1</td></tr>\n",
       "\t<tr><th scope=row>19</th><td>NK      </td><td>rs12151742_GNLY         </td><td>  0</td><td> 1</td></tr>\n",
       "\t<tr><th scope=row>2</th><td>CD4T    </td><td>rs111454690_HLA-DRB5    </td><td> 14</td><td> 0</td></tr>\n",
       "\t<tr><th scope=row>6</th><td>CD4T    </td><td>rs4147638_SMDT1         </td><td> 14</td><td> 0</td></tr>\n",
       "\t<tr><th scope=row>8</th><td>CD4T    </td><td>rs7632486_CMTM8         </td><td>  4</td><td> 0</td></tr>\n",
       "\t<tr><th scope=row>9</th><td>CD4T    </td><td>rs9271520_HLA-DQA2      </td><td>  5</td><td> 0</td></tr>\n",
       "\t<tr><th scope=row>10</th><td>CD8T    </td><td>rs1131017_RPS26         </td><td> 62</td><td> 0</td></tr>\n",
       "\t<tr><th scope=row>11</th><td>CD8T    </td><td>rs4147638_SMDT1         </td><td> 78</td><td> 0</td></tr>\n",
       "\t<tr><th scope=row>12</th><td>CD8T    </td><td>rs6708265_PASK          </td><td>  3</td><td> 0</td></tr>\n",
       "\t<tr><th scope=row>13</th><td>CD8T    </td><td>rs7605824_SH3YL1        </td><td>  9</td><td> 0</td></tr>\n",
       "\t<tr><th scope=row>15</th><td>monocyte</td><td>rs111454690_HLA-DRB5    </td><td>  1</td><td> 0</td></tr>\n",
       "\t<tr><th scope=row>17</th><td>monocyte</td><td>rs9271520_HLA-DQA2      </td><td>  4</td><td> 0</td></tr>\n",
       "</tbody>\n",
       "</table>\n"
      ],
      "text/latex": [
       "A data.frame: 19 × 4\n",
       "\\begin{tabular}{r|llll}\n",
       "  & cell\\_type & snp\\_eGene & ReMap\\_amount & TRANSFAC\\_amount\\\\\n",
       "  & <chr> & <chr> & <dbl> & <dbl>\\\\\n",
       "\\hline\n",
       "\t5 & CD4T     & rs1131017\\_RPS26\\_positive & 125 & 51\\\\\n",
       "\t16 & monocyte & rs1131017\\_RPS26          & 145 & 40\\\\\n",
       "\t18 & NK       & rs1131017\\_RPS26          & 132 & 20\\\\\n",
       "\t1 & B        & rs1131017\\_RPS26          &  82 &  3\\\\\n",
       "\t14 & DC       & rs7935082\\_MS4A7          &   0 &  2\\\\\n",
       "\t3 & CD4T     & rs1131017\\_RPS26          & 134 &  1\\\\\n",
       "\t4 & CD4T     & rs1131017\\_RPS26\\_negative &  93 &  1\\\\\n",
       "\t7 & CD4T     & rs7605824\\_SH3YL1         &  58 &  1\\\\\n",
       "\t19 & NK       & rs12151742\\_GNLY          &   0 &  1\\\\\n",
       "\t2 & CD4T     & rs111454690\\_HLA-DRB5     &  14 &  0\\\\\n",
       "\t6 & CD4T     & rs4147638\\_SMDT1          &  14 &  0\\\\\n",
       "\t8 & CD4T     & rs7632486\\_CMTM8          &   4 &  0\\\\\n",
       "\t9 & CD4T     & rs9271520\\_HLA-DQA2       &   5 &  0\\\\\n",
       "\t10 & CD8T     & rs1131017\\_RPS26          &  62 &  0\\\\\n",
       "\t11 & CD8T     & rs4147638\\_SMDT1          &  78 &  0\\\\\n",
       "\t12 & CD8T     & rs6708265\\_PASK           &   3 &  0\\\\\n",
       "\t13 & CD8T     & rs7605824\\_SH3YL1         &   9 &  0\\\\\n",
       "\t15 & monocyte & rs111454690\\_HLA-DRB5     &   1 &  0\\\\\n",
       "\t17 & monocyte & rs9271520\\_HLA-DQA2       &   4 &  0\\\\\n",
       "\\end{tabular}\n"
      ],
      "text/markdown": [
       "\n",
       "A data.frame: 19 × 4\n",
       "\n",
       "| <!--/--> | cell_type &lt;chr&gt; | snp_eGene &lt;chr&gt; | ReMap_amount &lt;dbl&gt; | TRANSFAC_amount &lt;dbl&gt; |\n",
       "|---|---|---|---|---|\n",
       "| 5 | CD4T     | rs1131017_RPS26_positive | 125 | 51 |\n",
       "| 16 | monocyte | rs1131017_RPS26          | 145 | 40 |\n",
       "| 18 | NK       | rs1131017_RPS26          | 132 | 20 |\n",
       "| 1 | B        | rs1131017_RPS26          |  82 |  3 |\n",
       "| 14 | DC       | rs7935082_MS4A7          |   0 |  2 |\n",
       "| 3 | CD4T     | rs1131017_RPS26          | 134 |  1 |\n",
       "| 4 | CD4T     | rs1131017_RPS26_negative |  93 |  1 |\n",
       "| 7 | CD4T     | rs7605824_SH3YL1         |  58 |  1 |\n",
       "| 19 | NK       | rs12151742_GNLY          |   0 |  1 |\n",
       "| 2 | CD4T     | rs111454690_HLA-DRB5     |  14 |  0 |\n",
       "| 6 | CD4T     | rs4147638_SMDT1          |  14 |  0 |\n",
       "| 8 | CD4T     | rs7632486_CMTM8          |   4 |  0 |\n",
       "| 9 | CD4T     | rs9271520_HLA-DQA2       |   5 |  0 |\n",
       "| 10 | CD8T     | rs1131017_RPS26          |  62 |  0 |\n",
       "| 11 | CD8T     | rs4147638_SMDT1          |  78 |  0 |\n",
       "| 12 | CD8T     | rs6708265_PASK           |   3 |  0 |\n",
       "| 13 | CD8T     | rs7605824_SH3YL1         |   9 |  0 |\n",
       "| 15 | monocyte | rs111454690_HLA-DRB5     |   1 |  0 |\n",
       "| 17 | monocyte | rs9271520_HLA-DQA2       |   4 |  0 |\n",
       "\n"
      ],
      "text/plain": [
       "   cell_type snp_eGene                ReMap_amount TRANSFAC_amount\n",
       "5  CD4T      rs1131017_RPS26_positive 125          51             \n",
       "16 monocyte  rs1131017_RPS26          145          40             \n",
       "18 NK        rs1131017_RPS26          132          20             \n",
       "1  B         rs1131017_RPS26           82           3             \n",
       "14 DC        rs7935082_MS4A7            0           2             \n",
       "3  CD4T      rs1131017_RPS26          134           1             \n",
       "4  CD4T      rs1131017_RPS26_negative  93           1             \n",
       "7  CD4T      rs7605824_SH3YL1          58           1             \n",
       "19 NK        rs12151742_GNLY            0           1             \n",
       "2  CD4T      rs111454690_HLA-DRB5      14           0             \n",
       "6  CD4T      rs4147638_SMDT1           14           0             \n",
       "8  CD4T      rs7632486_CMTM8            4           0             \n",
       "9  CD4T      rs9271520_HLA-DQA2         5           0             \n",
       "10 CD8T      rs1131017_RPS26           62           0             \n",
       "11 CD8T      rs4147638_SMDT1           78           0             \n",
       "12 CD8T      rs6708265_PASK             3           0             \n",
       "13 CD8T      rs7605824_SH3YL1           9           0             \n",
       "15 monocyte  rs111454690_HLA-DRB5       1           0             \n",
       "17 monocyte  rs9271520_HLA-DQA2         4           0             "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "overview[order(overview$TRANSFAC_amount, decreasing = TRUE),]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 247,
   "id": "c04f7c9b-1e74-401d-bcf4-569919447df6",
   "metadata": {},
   "outputs": [],
   "source": [
    "write.csv(overview, paste0(path, \"transfac_results/TRANSFAC_ReMap_comparison.csv\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "80ddea9d-1cc5-4ce3-b595-e15d06593b83",
   "metadata": {},
   "source": [
    "## Compare the TFs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 254,
   "id": "8dce9b80-35b0-4f3e-9023-9912c1348657",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Paper: six TFs—RBM39, TCF7, LEF1, KLF6, CD74 and MAF—whose binding sites were enriched in the promoter region of the rs1131017–RPS26\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 280,
   "id": "fce20036-c368-467e-9f62-07dd7cf63b6e",
   "metadata": {},
   "outputs": [],
   "source": [
    "enrichment$tf = str_extract(enrichment$term_name, '.*;')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 281,
   "id": "8ffb3678-a8e1-4648-bcfc-d9fc3d1b30c3",
   "metadata": {},
   "outputs": [],
   "source": [
    "enrichment$tf = str_replace(enrichment$tf, 'Factor: ', '')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 282,
   "id": "9b91aee3-698d-488c-8e7f-d68cc00bdb17",
   "metadata": {},
   "outputs": [],
   "source": [
    "enrichment$tf = str_replace(enrichment$tf, ';', '')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 283,
   "id": "ea937e43-2fdf-47c3-8f1a-e9919889e5f5",
   "metadata": {},
   "outputs": [],
   "source": [
    "enrichment$tf = str_replace(enrichment$tf , 'motif.*', '')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 284,
   "id": "0855320a-54d6-422c-9991-d2d9329c3fb8",
   "metadata": {},
   "outputs": [],
   "source": [
    "enrichment$tf = str_replace(enrichment$tf , '-', '')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 285,
   "id": "9f0f674c-8690-4e2c-84e2-949048f3e891",
   "metadata": {},
   "outputs": [],
   "source": [
    "enrichment$tf = toupper(enrichment$tf)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 286,
   "id": "891df001-7839-41b9-96f5-6b2521e3b6eb",
   "metadata": {},
   "outputs": [],
   "source": [
    "enrichment$tf = str_replace(enrichment$tf , ' ', '')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 292,
   "id": "2873020c-ae31-4cfb-bb8b-953da1a03c61",
   "metadata": {},
   "outputs": [],
   "source": [
    "enrichment$tf = str_replace(enrichment$tf, 'CETS-1', 'ETS1')\n",
    "enrichment$tf = str_replace(enrichment$tf, 'C/EBPBETA|C/EBPBETA|C/EBPbeta|C/EBPBETA|GCMA:CEBPB', 'CEBPB')\n",
    "enrichment$tf = str_replace(enrichment$tf, 'C/EBPDELTA|C/EBPDELTA|TEF3:CEBPD', 'CEBPD')\n",
    "enrichment$tf = str_replace(enrichment$tf, 'C/EBPGAMMA', 'CEBPG')\n",
    "enrichment$tf = str_replace(enrichment$tf, 'ELK1:HOXB13', 'ELK1')\n",
    "enrichment$tf = str_replace(enrichment$tf, 'GTF2IRD1ISOFORM2', 'GTF2I')\n",
    "enrichment$tf = str_replace(enrichment$tf, 'MEIS1:ELF1', 'ELF1')\n",
    "enrichment$tf = str_replace(enrichment$tf, 'PU.1', 'SPI1')\n",
    "enrichment$tf = str_replace(enrichment$tf, 'TEF3:ERG', 'ERG')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 293,
   "id": "f2c5345b-0fdd-40fb-a982-96794b5f6440",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"dataframe\">\n",
       "<caption>A data.frame: 2 × 16</caption>\n",
       "<thead>\n",
       "\t<tr><th></th><th scope=col>query</th><th scope=col>significant</th><th scope=col>p_value</th><th scope=col>term_size</th><th scope=col>query_size</th><th scope=col>intersection_size</th><th scope=col>precision</th><th scope=col>recall</th><th scope=col>term_id</th><th scope=col>source</th><th scope=col>term_name</th><th scope=col>effective_domain_size</th><th scope=col>source_order</th><th scope=col>cell_type</th><th scope=col>snp_eGene</th><th scope=col>tf</th></tr>\n",
       "\t<tr><th></th><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;lgl&gt;</th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;chr&gt;</th></tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "\t<tr><th scope=row>1</th><td>query_1</td><td>TRUE</td><td>0.04961083</td><td>2342</td><td>27</td><td>22</td><td>0.8148148</td><td>0.009393681</td><td>TF:M00665</td><td>TF</td><td>Factor: Sp3; motif: ASMCTTGGGSRGGG</td><td>5705</td><td>7882</td><td>DC</td><td>rs7935082_MS4A7</td><td>SP3  </td></tr>\n",
       "\t<tr><th scope=row>2</th><td>query_1</td><td>TRUE</td><td>0.04961083</td><td>2303</td><td>27</td><td>22</td><td>0.8148148</td><td>0.009552757</td><td>TF:M03582</td><td>TF</td><td>Factor: TWIST; motif: CACCTGG     </td><td>5705</td><td>8844</td><td>DC</td><td>rs7935082_MS4A7</td><td>TWIST</td></tr>\n",
       "</tbody>\n",
       "</table>\n"
      ],
      "text/latex": [
       "A data.frame: 2 × 16\n",
       "\\begin{tabular}{r|llllllllllllllll}\n",
       "  & query & significant & p\\_value & term\\_size & query\\_size & intersection\\_size & precision & recall & term\\_id & source & term\\_name & effective\\_domain\\_size & source\\_order & cell\\_type & snp\\_eGene & tf\\\\\n",
       "  & <chr> & <lgl> & <dbl> & <int> & <int> & <int> & <dbl> & <dbl> & <chr> & <chr> & <chr> & <int> & <int> & <chr> & <chr> & <chr>\\\\\n",
       "\\hline\n",
       "\t1 & query\\_1 & TRUE & 0.04961083 & 2342 & 27 & 22 & 0.8148148 & 0.009393681 & TF:M00665 & TF & Factor: Sp3; motif: ASMCTTGGGSRGGG & 5705 & 7882 & DC & rs7935082\\_MS4A7 & SP3  \\\\\n",
       "\t2 & query\\_1 & TRUE & 0.04961083 & 2303 & 27 & 22 & 0.8148148 & 0.009552757 & TF:M03582 & TF & Factor: TWIST; motif: CACCTGG      & 5705 & 8844 & DC & rs7935082\\_MS4A7 & TWIST\\\\\n",
       "\\end{tabular}\n"
      ],
      "text/markdown": [
       "\n",
       "A data.frame: 2 × 16\n",
       "\n",
       "| <!--/--> | query &lt;chr&gt; | significant &lt;lgl&gt; | p_value &lt;dbl&gt; | term_size &lt;int&gt; | query_size &lt;int&gt; | intersection_size &lt;int&gt; | precision &lt;dbl&gt; | recall &lt;dbl&gt; | term_id &lt;chr&gt; | source &lt;chr&gt; | term_name &lt;chr&gt; | effective_domain_size &lt;int&gt; | source_order &lt;int&gt; | cell_type &lt;chr&gt; | snp_eGene &lt;chr&gt; | tf &lt;chr&gt; |\n",
       "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n",
       "| 1 | query_1 | TRUE | 0.04961083 | 2342 | 27 | 22 | 0.8148148 | 0.009393681 | TF:M00665 | TF | Factor: Sp3; motif: ASMCTTGGGSRGGG | 5705 | 7882 | DC | rs7935082_MS4A7 | SP3   |\n",
       "| 2 | query_1 | TRUE | 0.04961083 | 2303 | 27 | 22 | 0.8148148 | 0.009552757 | TF:M03582 | TF | Factor: TWIST; motif: CACCTGG      | 5705 | 8844 | DC | rs7935082_MS4A7 | TWIST |\n",
       "\n"
      ],
      "text/plain": [
       "  query   significant p_value    term_size query_size intersection_size\n",
       "1 query_1 TRUE        0.04961083 2342      27         22               \n",
       "2 query_1 TRUE        0.04961083 2303      27         22               \n",
       "  precision recall      term_id   source term_name                         \n",
       "1 0.8148148 0.009393681 TF:M00665 TF     Factor: Sp3; motif: ASMCTTGGGSRGGG\n",
       "2 0.8148148 0.009552757 TF:M03582 TF     Factor: TWIST; motif: CACCTGG     \n",
       "  effective_domain_size source_order cell_type snp_eGene       tf   \n",
       "1 5705                  7882         DC        rs7935082_MS4A7 SP3  \n",
       "2 5705                  8844         DC        rs7935082_MS4A7 TWIST"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "head(enrichment,2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 299,
   "id": "6ff9d9b7-e3b8-46d7-93ae-8b2f765f381a",
   "metadata": {},
   "outputs": [],
   "source": [
    "colnames(enrichment) = paste0('TRANSFAC_', colnames(enrichment))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 296,
   "id": "b940e752-4002-4c52-b066-bb3c2ff83e36",
   "metadata": {},
   "outputs": [],
   "source": [
    "### Merge with ReMap REsults"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 298,
   "id": "60ff3520-ffac-4660-bb4b-9744db63c309",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"dataframe\">\n",
       "<caption>A data.frame: 2 × 13</caption>\n",
       "<thead>\n",
       "\t<tr><th></th><th scope=col>Cell.type</th><th scope=col>eQTL..SNP.eGene.</th><th scope=col>TF</th><th scope=col>TF.is.a.co.eGene.</th><th scope=col>enrichment.p.value</th><th scope=col>X..TF.overlap...co.eGene</th><th scope=col>X..TF.overlap...background</th><th scope=col>X..no.TF.overlap...co.eGene</th><th scope=col>X..background.gene...not.co.eGene</th><th scope=col>enrichment.fdr</th><th scope=col>eQTL.SNP</th><th scope=col>SNP.overlaps.TF.</th><th scope=col>Names.of.overlapping.SNPs</th></tr>\n",
       "\t<tr><th></th><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;lgl&gt;</th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;lgl&gt;</th><th scope=col>&lt;chr&gt;</th></tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "\t<tr><th scope=row>1</th><td>CD4T</td><td>rs111454690_HLA-DRB5</td><td>CDK8   </td><td>FALSE</td><td>9.630369e-06</td><td>14</td><td>5</td><td>2778</td><td> 8515</td><td>1.640373e-03</td><td>rs111454690</td><td>FALSE</td><td></td></tr>\n",
       "\t<tr><th scope=row>2</th><td>CD4T</td><td>rs111454690_HLA-DRB5</td><td>SNRNP70</td><td>FALSE</td><td>1.209254e-09</td><td>11</td><td>8</td><td> 649</td><td>10644</td><td>6.179288e-07</td><td>rs111454690</td><td>FALSE</td><td></td></tr>\n",
       "</tbody>\n",
       "</table>\n"
      ],
      "text/latex": [
       "A data.frame: 2 × 13\n",
       "\\begin{tabular}{r|lllllllllllll}\n",
       "  & Cell.type & eQTL..SNP.eGene. & TF & TF.is.a.co.eGene. & enrichment.p.value & X..TF.overlap...co.eGene & X..TF.overlap...background & X..no.TF.overlap...co.eGene & X..background.gene...not.co.eGene & enrichment.fdr & eQTL.SNP & SNP.overlaps.TF. & Names.of.overlapping.SNPs\\\\\n",
       "  & <chr> & <chr> & <chr> & <lgl> & <dbl> & <int> & <int> & <int> & <int> & <dbl> & <chr> & <lgl> & <chr>\\\\\n",
       "\\hline\n",
       "\t1 & CD4T & rs111454690\\_HLA-DRB5 & CDK8    & FALSE & 9.630369e-06 & 14 & 5 & 2778 &  8515 & 1.640373e-03 & rs111454690 & FALSE & \\\\\n",
       "\t2 & CD4T & rs111454690\\_HLA-DRB5 & SNRNP70 & FALSE & 1.209254e-09 & 11 & 8 &  649 & 10644 & 6.179288e-07 & rs111454690 & FALSE & \\\\\n",
       "\\end{tabular}\n"
      ],
      "text/markdown": [
       "\n",
       "A data.frame: 2 × 13\n",
       "\n",
       "| <!--/--> | Cell.type &lt;chr&gt; | eQTL..SNP.eGene. &lt;chr&gt; | TF &lt;chr&gt; | TF.is.a.co.eGene. &lt;lgl&gt; | enrichment.p.value &lt;dbl&gt; | X..TF.overlap...co.eGene &lt;int&gt; | X..TF.overlap...background &lt;int&gt; | X..no.TF.overlap...co.eGene &lt;int&gt; | X..background.gene...not.co.eGene &lt;int&gt; | enrichment.fdr &lt;dbl&gt; | eQTL.SNP &lt;chr&gt; | SNP.overlaps.TF. &lt;lgl&gt; | Names.of.overlapping.SNPs &lt;chr&gt; |\n",
       "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n",
       "| 1 | CD4T | rs111454690_HLA-DRB5 | CDK8    | FALSE | 9.630369e-06 | 14 | 5 | 2778 |  8515 | 1.640373e-03 | rs111454690 | FALSE | <!----> |\n",
       "| 2 | CD4T | rs111454690_HLA-DRB5 | SNRNP70 | FALSE | 1.209254e-09 | 11 | 8 |  649 | 10644 | 6.179288e-07 | rs111454690 | FALSE | <!----> |\n",
       "\n"
      ],
      "text/plain": [
       "  Cell.type eQTL..SNP.eGene.     TF      TF.is.a.co.eGene. enrichment.p.value\n",
       "1 CD4T      rs111454690_HLA-DRB5 CDK8    FALSE             9.630369e-06      \n",
       "2 CD4T      rs111454690_HLA-DRB5 SNRNP70 FALSE             1.209254e-09      \n",
       "  X..TF.overlap...co.eGene X..TF.overlap...background\n",
       "1 14                       5                         \n",
       "2 11                       8                         \n",
       "  X..no.TF.overlap...co.eGene X..background.gene...not.co.eGene enrichment.fdr\n",
       "1 2778                         8515                             1.640373e-03  \n",
       "2  649                        10644                             6.179288e-07  \n",
       "  eQTL.SNP    SNP.overlaps.TF. Names.of.overlapping.SNPs\n",
       "1 rs111454690 FALSE                                     \n",
       "2 rs111454690 FALSE                                     "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "head(old_enrichments,2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 300,
   "id": "1e378133-c45d-4ca4-bfe0-b6b89e2dd7f2",
   "metadata": {},
   "outputs": [],
   "source": [
    "colnames(old_enrichments) = paste0('ReMap', colnames(old_enrichments))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 301,
   "id": "2f103548-f736-4a81-ba6c-af85ac4da9c4",
   "metadata": {},
   "outputs": [],
   "source": [
    "combined = merge(enrichment, old_enrichments, by.x = c('TRANSFAC_cell_type', 'TRANSFAC_snp_eGene', 'TRANSFAC_tf'), by.y = c('ReMapCell.type', 'ReMapeQTL..SNP.eGene.', 'ReMapTF'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 303,
   "id": "2b4a65ee-ecf1-4d93-9718-88b5f9f49d20",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "31"
      ],
      "text/latex": [
       "31"
      ],
      "text/markdown": [
       "31"
      ],
      "text/plain": [
       "[1] 31"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "nrow(combined)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 306,
   "id": "0ae3f173-266e-4720-bea0-fbd297c726d5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>\n",
       ".list-inline {list-style: none; margin:0; padding: 0}\n",
       ".list-inline>li {display: inline-block}\n",
       ".list-inline>li:not(:last-child)::after {content: \"\\00b7\"; padding: 0 .5ex}\n",
       "</style>\n",
       "<ol class=list-inline><li>'CEBPD'</li><li>'CEBPB'</li><li>'ELK1'</li><li>'FLI1'</li><li>'HOXA9'</li></ol>\n"
      ],
      "text/latex": [
       "\\begin{enumerate*}\n",
       "\\item 'CEBPD'\n",
       "\\item 'CEBPB'\n",
       "\\item 'ELK1'\n",
       "\\item 'FLI1'\n",
       "\\item 'HOXA9'\n",
       "\\end{enumerate*}\n"
      ],
      "text/markdown": [
       "1. 'CEBPD'\n",
       "2. 'CEBPB'\n",
       "3. 'ELK1'\n",
       "4. 'FLI1'\n",
       "5. 'HOXA9'\n",
       "\n",
       "\n"
      ],
      "text/plain": [
       "[1] \"CEBPD\" \"CEBPB\" \"ELK1\"  \"FLI1\"  \"HOXA9\""
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "unique(combined$TRANSFAC_tf)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 304,
   "id": "4924d056-d88a-4dcc-b21d-4f42f2365c10",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"dataframe\">\n",
       "<caption>A data.frame: 31 × 26</caption>\n",
       "<thead>\n",
       "\t<tr><th scope=col>TRANSFAC_cell_type</th><th scope=col>TRANSFAC_snp_eGene</th><th scope=col>TRANSFAC_tf</th><th scope=col>TRANSFAC_query</th><th scope=col>TRANSFAC_significant</th><th scope=col>TRANSFAC_p_value</th><th scope=col>TRANSFAC_term_size</th><th scope=col>TRANSFAC_query_size</th><th scope=col>TRANSFAC_intersection_size</th><th scope=col>TRANSFAC_precision</th><th scope=col>⋯</th><th scope=col>ReMapTF.is.a.co.eGene.</th><th scope=col>ReMapenrichment.p.value</th><th scope=col>ReMapX..TF.overlap...co.eGene</th><th scope=col>ReMapX..TF.overlap...background</th><th scope=col>ReMapX..no.TF.overlap...co.eGene</th><th scope=col>ReMapX..background.gene...not.co.eGene</th><th scope=col>ReMapenrichment.fdr</th><th scope=col>ReMapeQTL.SNP</th><th scope=col>ReMapSNP.overlaps.TF.</th><th scope=col>ReMapNames.of.overlapping.SNPs</th></tr>\n",
       "\t<tr><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;lgl&gt;</th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;dbl&gt;</th><th scope=col>⋯</th><th scope=col>&lt;lgl&gt;</th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;int&gt;</th><th scope=col>&lt;dbl&gt;</th><th scope=col>&lt;chr&gt;</th><th scope=col>&lt;lgl&gt;</th><th scope=col>&lt;chr&gt;</th></tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "\t<tr><td>B       </td><td>rs1131017_RPS26         </td><td>CEBPD</td><td>query_1</td><td>TRUE</td><td>0.028703406</td><td> 501</td><td> 35</td><td> 23</td><td>0.6571429</td><td>⋯</td><td>FALSE</td><td>3.034184e-06</td><td> 34</td><td>  1</td><td>1096</td><td>  632</td><td>1.107477e-04</td><td>rs1131017</td><td> TRUE</td><td>rs1131017,rs7297175</td></tr>\n",
       "\t<tr><td>CD4T    </td><td>rs1131017_RPS26_positive</td><td>CEBPB</td><td>query_1</td><td>TRUE</td><td>0.028742923</td><td>2503</td><td>191</td><td> 70</td><td>0.3664921</td><td>⋯</td><td>FALSE</td><td>2.421193e-05</td><td>159</td><td> 41</td><td>7460</td><td> 3833</td><td>2.877279e-04</td><td>rs1131017</td><td> TRUE</td><td>rs7297175          </td></tr>\n",
       "\t<tr><td>CD4T    </td><td>rs1131017_RPS26_positive</td><td>CEBPB</td><td>query_1</td><td>TRUE</td><td>0.041618610</td><td>1598</td><td>191</td><td> 49</td><td>0.2565445</td><td>⋯</td><td>FALSE</td><td>2.421193e-05</td><td>159</td><td> 41</td><td>7460</td><td> 3833</td><td>2.877279e-04</td><td>rs1131017</td><td> TRUE</td><td>rs7297175          </td></tr>\n",
       "\t<tr><td>CD4T    </td><td>rs1131017_RPS26_positive</td><td>CEBPB</td><td>query_1</td><td>TRUE</td><td>0.017941417</td><td>2826</td><td>191</td><td> 78</td><td>0.4083770</td><td>⋯</td><td>FALSE</td><td>2.421193e-05</td><td>159</td><td> 41</td><td>7460</td><td> 3833</td><td>2.877279e-04</td><td>rs1131017</td><td> TRUE</td><td>rs7297175          </td></tr>\n",
       "\t<tr><td>CD4T    </td><td>rs1131017_RPS26_positive</td><td>CEBPD</td><td>query_1</td><td>TRUE</td><td>0.041618610</td><td>1464</td><td>191</td><td> 46</td><td>0.2408377</td><td>⋯</td><td>FALSE</td><td>7.264122e-05</td><td>133</td><td> 67</td><td>5970</td><td> 5323</td><td>7.423933e-04</td><td>rs1131017</td><td> TRUE</td><td>rs1131017,rs7297175</td></tr>\n",
       "\t<tr><td>CD4T    </td><td>rs1131017_RPS26_positive</td><td>ELK1 </td><td>query_1</td><td>TRUE</td><td>0.007142316</td><td>4467</td><td>191</td><td>113</td><td>0.5916230</td><td>⋯</td><td>FALSE</td><td>7.289311e-04</td><td> 94</td><td>106</td><td>4030</td><td> 7263</td><td>5.173386e-03</td><td>rs1131017</td><td> TRUE</td><td>rs10876864         </td></tr>\n",
       "\t<tr><td>CD4T    </td><td>rs1131017_RPS26_positive</td><td>FLI1 </td><td>query_1</td><td>TRUE</td><td>0.024172382</td><td>2207</td><td>191</td><td> 64</td><td>0.3350785</td><td>⋯</td><td>FALSE</td><td>9.006657e-03</td><td>131</td><td> 69</td><td>6433</td><td> 4860</td><td>4.002088e-02</td><td>rs1131017</td><td> TRUE</td><td>rs1131017          </td></tr>\n",
       "\t<tr><td>CD4T    </td><td>rs1131017_RPS26_positive</td><td>FLI1 </td><td>query_1</td><td>TRUE</td><td>0.047267440</td><td>3060</td><td>191</td><td> 80</td><td>0.4188482</td><td>⋯</td><td>FALSE</td><td>9.006657e-03</td><td>131</td><td> 69</td><td>6433</td><td> 4860</td><td>4.002088e-02</td><td>rs1131017</td><td> TRUE</td><td>rs1131017          </td></tr>\n",
       "\t<tr><td>CD4T    </td><td>rs1131017_RPS26_positive</td><td>FLI1 </td><td>query_1</td><td>TRUE</td><td>0.041618610</td><td>2325</td><td>191</td><td> 65</td><td>0.3403141</td><td>⋯</td><td>FALSE</td><td>9.006657e-03</td><td>131</td><td> 69</td><td>6433</td><td> 4860</td><td>4.002088e-02</td><td>rs1131017</td><td> TRUE</td><td>rs1131017          </td></tr>\n",
       "\t<tr><td>CD4T    </td><td>rs1131017_RPS26_positive</td><td>HOXA9</td><td>query_1</td><td>TRUE</td><td>0.002470867</td><td>3285</td><td>191</td><td> 95</td><td>0.4973822</td><td>⋯</td><td>FALSE</td><td>1.610430e-05</td><td> 20</td><td>180</td><td> 372</td><td>10921</td><td>2.083458e-04</td><td>rs1131017</td><td>FALSE</td><td>                   </td></tr>\n",
       "\t<tr><td>CD4T    </td><td>rs1131017_RPS26_positive</td><td>HOXA9</td><td>query_1</td><td>TRUE</td><td>0.007142316</td><td> 732</td><td>191</td><td> 31</td><td>0.1623037</td><td>⋯</td><td>FALSE</td><td>1.610430e-05</td><td> 20</td><td>180</td><td> 372</td><td>10921</td><td>2.083458e-04</td><td>rs1131017</td><td>FALSE</td><td>                   </td></tr>\n",
       "\t<tr><td>CD4T    </td><td>rs1131017_RPS26_positive</td><td>HOXA9</td><td>query_1</td><td>TRUE</td><td>0.004188494</td><td>1124</td><td>191</td><td> 43</td><td>0.2251309</td><td>⋯</td><td>FALSE</td><td>1.610430e-05</td><td> 20</td><td>180</td><td> 372</td><td>10921</td><td>2.083458e-04</td><td>rs1131017</td><td>FALSE</td><td>                   </td></tr>\n",
       "\t<tr><td>CD4T    </td><td>rs7605824_SH3YL1        </td><td>CEBPD</td><td>query_1</td><td>TRUE</td><td>0.022537569</td><td>3025</td><td> 20</td><td> 16</td><td>0.8000000</td><td>⋯</td><td>FALSE</td><td>2.865087e-03</td><td> 17</td><td>  3</td><td>5970</td><td> 5323</td><td>3.327408e-02</td><td>rs7605824</td><td>FALSE</td><td>                   </td></tr>\n",
       "\t<tr><td>monocyte</td><td>rs1131017_RPS26         </td><td>CEBPB</td><td>query_1</td><td>TRUE</td><td>0.036312693</td><td>3140</td><td>126</td><td> 65</td><td>0.5158730</td><td>⋯</td><td>FALSE</td><td>4.198463e-03</td><td>106</td><td> 26</td><td>6655</td><td> 2899</td><td>1.968270e-02</td><td>rs1131017</td><td> TRUE</td><td>rs7297175          </td></tr>\n",
       "\t<tr><td>monocyte</td><td>rs1131017_RPS26         </td><td>CEBPB</td><td>query_1</td><td>TRUE</td><td>0.029926625</td><td>1416</td><td>126</td><td> 37</td><td>0.2936508</td><td>⋯</td><td>FALSE</td><td>4.198463e-03</td><td>106</td><td> 26</td><td>6655</td><td> 2899</td><td>1.968270e-02</td><td>rs1131017</td><td> TRUE</td><td>rs7297175          </td></tr>\n",
       "\t<tr><td>monocyte</td><td>rs1131017_RPS26         </td><td>CEBPB</td><td>query_1</td><td>TRUE</td><td>0.009638736</td><td>2201</td><td>126</td><td> 55</td><td>0.4365079</td><td>⋯</td><td>FALSE</td><td>4.198463e-03</td><td>106</td><td> 26</td><td>6655</td><td> 2899</td><td>1.968270e-02</td><td>rs1131017</td><td> TRUE</td><td>rs7297175          </td></tr>\n",
       "\t<tr><td>monocyte</td><td>rs1131017_RPS26         </td><td>CEBPB</td><td>query_1</td><td>TRUE</td><td>0.009638736</td><td>2479</td><td>126</td><td> 59</td><td>0.4682540</td><td>⋯</td><td>FALSE</td><td>4.198463e-03</td><td>106</td><td> 26</td><td>6655</td><td> 2899</td><td>1.968270e-02</td><td>rs1131017</td><td> TRUE</td><td>rs7297175          </td></tr>\n",
       "\t<tr><td>monocyte</td><td>rs1131017_RPS26         </td><td>CEBPB</td><td>query_1</td><td>TRUE</td><td>0.022933498</td><td>1622</td><td>126</td><td> 42</td><td>0.3333333</td><td>⋯</td><td>FALSE</td><td>4.198463e-03</td><td>106</td><td> 26</td><td>6655</td><td> 2899</td><td>1.968270e-02</td><td>rs1131017</td><td> TRUE</td><td>rs7297175          </td></tr>\n",
       "\t<tr><td>monocyte</td><td>rs1131017_RPS26         </td><td>CEBPD</td><td>query_1</td><td>TRUE</td><td>0.049065326</td><td>2296</td><td>126</td><td> 51</td><td>0.4047619</td><td>⋯</td><td>FALSE</td><td>1.611238e-05</td><td> 97</td><td> 35</td><td>5295</td><td> 4259</td><td>1.960339e-04</td><td>rs1131017</td><td> TRUE</td><td>rs1131017,rs7297175</td></tr>\n",
       "\t<tr><td>monocyte</td><td>rs1131017_RPS26         </td><td>CEBPD</td><td>query_1</td><td>TRUE</td><td>0.026972058</td><td>1316</td><td>126</td><td> 36</td><td>0.2857143</td><td>⋯</td><td>FALSE</td><td>1.611238e-05</td><td> 97</td><td> 35</td><td>5295</td><td> 4259</td><td>1.960339e-04</td><td>rs1131017</td><td> TRUE</td><td>rs1131017,rs7297175</td></tr>\n",
       "\t<tr><td>monocyte</td><td>rs1131017_RPS26         </td><td>CEBPD</td><td>query_1</td><td>TRUE</td><td>0.028679873</td><td>2564</td><td>126</td><td> 57</td><td>0.4523810</td><td>⋯</td><td>FALSE</td><td>1.611238e-05</td><td> 97</td><td> 35</td><td>5295</td><td> 4259</td><td>1.960339e-04</td><td>rs1131017</td><td> TRUE</td><td>rs1131017,rs7297175</td></tr>\n",
       "\t<tr><td>monocyte</td><td>rs1131017_RPS26         </td><td>ELK1 </td><td>query_1</td><td>TRUE</td><td>0.028679873</td><td>4261</td><td>126</td><td> 82</td><td>0.6507937</td><td>⋯</td><td>FALSE</td><td>8.317368e-05</td><td> 71</td><td> 61</td><td>3550</td><td> 6004</td><td>8.019198e-04</td><td>rs1131017</td><td> TRUE</td><td>rs10876864         </td></tr>\n",
       "\t<tr><td>monocyte</td><td>rs1131017_RPS26         </td><td>ELK1 </td><td>query_1</td><td>TRUE</td><td>0.049065326</td><td>3932</td><td>126</td><td> 76</td><td>0.6031746</td><td>⋯</td><td>FALSE</td><td>8.317368e-05</td><td> 71</td><td> 61</td><td>3550</td><td> 6004</td><td>8.019198e-04</td><td>rs1131017</td><td> TRUE</td><td>rs10876864         </td></tr>\n",
       "\t<tr><td>monocyte</td><td>rs1131017_RPS26         </td><td>ELK1 </td><td>query_1</td><td>TRUE</td><td>0.044794472</td><td>2973</td><td>126</td><td> 62</td><td>0.4920635</td><td>⋯</td><td>FALSE</td><td>8.317368e-05</td><td> 71</td><td> 61</td><td>3550</td><td> 6004</td><td>8.019198e-04</td><td>rs1131017</td><td> TRUE</td><td>rs10876864         </td></tr>\n",
       "\t<tr><td>monocyte</td><td>rs1131017_RPS26         </td><td>FLI1 </td><td>query_1</td><td>TRUE</td><td>0.036045611</td><td>1951</td><td>126</td><td> 46</td><td>0.3650794</td><td>⋯</td><td>FALSE</td><td>4.430257e-04</td><td> 97</td><td> 35</td><td>5648</td><td> 3906</td><td>3.144252e-03</td><td>rs1131017</td><td> TRUE</td><td>rs1131017          </td></tr>\n",
       "\t<tr><td>NK      </td><td>rs1131017_RPS26         </td><td>CEBPB</td><td>query_1</td><td>TRUE</td><td>0.006603932</td><td>1894</td><td> 94</td><td> 48</td><td>0.5106383</td><td>⋯</td><td>FALSE</td><td>1.566936e-03</td><td> 80</td><td> 16</td><td>5050</td><td> 2217</td><td>8.428465e-03</td><td>rs1131017</td><td> TRUE</td><td>rs7297175          </td></tr>\n",
       "\t<tr><td>NK      </td><td>rs1131017_RPS26         </td><td>CEBPB</td><td>query_1</td><td>TRUE</td><td>0.031154971</td><td>1076</td><td> 94</td><td> 30</td><td>0.3191489</td><td>⋯</td><td>FALSE</td><td>1.566936e-03</td><td> 80</td><td> 16</td><td>5050</td><td> 2217</td><td>8.428465e-03</td><td>rs1131017</td><td> TRUE</td><td>rs7297175          </td></tr>\n",
       "\t<tr><td>NK      </td><td>rs1131017_RPS26         </td><td>CEBPB</td><td>query_1</td><td>TRUE</td><td>0.016996993</td><td>1249</td><td> 94</td><td> 34</td><td>0.3617021</td><td>⋯</td><td>FALSE</td><td>1.566936e-03</td><td> 80</td><td> 16</td><td>5050</td><td> 2217</td><td>8.428465e-03</td><td>rs1131017</td><td> TRUE</td><td>rs7297175          </td></tr>\n",
       "\t<tr><td>NK      </td><td>rs1131017_RPS26         </td><td>CEBPB</td><td>query_1</td><td>TRUE</td><td>0.007729787</td><td>1684</td><td> 94</td><td> 43</td><td>0.4574468</td><td>⋯</td><td>FALSE</td><td>1.566936e-03</td><td> 80</td><td> 16</td><td>5050</td><td> 2217</td><td>8.428465e-03</td><td>rs1131017</td><td> TRUE</td><td>rs7297175          </td></tr>\n",
       "\t<tr><td>NK      </td><td>rs1131017_RPS26         </td><td>CEBPD</td><td>query_1</td><td>TRUE</td><td>0.033634713</td><td> 981</td><td> 94</td><td> 28</td><td>0.2978723</td><td>⋯</td><td>FALSE</td><td>5.531751e-07</td><td> 78</td><td> 18</td><td>4147</td><td> 3120</td><td>8.833515e-06</td><td>rs1131017</td><td> TRUE</td><td>rs1131017,rs7297175</td></tr>\n",
       "\t<tr><td>NK      </td><td>rs1131017_RPS26         </td><td>CEBPD</td><td>query_1</td><td>TRUE</td><td>0.030378248</td><td>1957</td><td> 94</td><td> 45</td><td>0.4787234</td><td>⋯</td><td>FALSE</td><td>5.531751e-07</td><td> 78</td><td> 18</td><td>4147</td><td> 3120</td><td>8.833515e-06</td><td>rs1131017</td><td> TRUE</td><td>rs1131017,rs7297175</td></tr>\n",
       "</tbody>\n",
       "</table>\n"
      ],
      "text/latex": [
       "A data.frame: 31 × 26\n",
       "\\begin{tabular}{lllllllllllllllllllll}\n",
       " TRANSFAC\\_cell\\_type & TRANSFAC\\_snp\\_eGene & TRANSFAC\\_tf & TRANSFAC\\_query & TRANSFAC\\_significant & TRANSFAC\\_p\\_value & TRANSFAC\\_term\\_size & TRANSFAC\\_query\\_size & TRANSFAC\\_intersection\\_size & TRANSFAC\\_precision & ⋯ & ReMapTF.is.a.co.eGene. & ReMapenrichment.p.value & ReMapX..TF.overlap...co.eGene & ReMapX..TF.overlap...background & ReMapX..no.TF.overlap...co.eGene & ReMapX..background.gene...not.co.eGene & ReMapenrichment.fdr & ReMapeQTL.SNP & ReMapSNP.overlaps.TF. & ReMapNames.of.overlapping.SNPs\\\\\n",
       " <chr> & <chr> & <chr> & <chr> & <lgl> & <dbl> & <int> & <int> & <int> & <dbl> & ⋯ & <lgl> & <dbl> & <int> & <int> & <int> & <int> & <dbl> & <chr> & <lgl> & <chr>\\\\\n",
       "\\hline\n",
       "\t B        & rs1131017\\_RPS26          & CEBPD & query\\_1 & TRUE & 0.028703406 &  501 &  35 &  23 & 0.6571429 & ⋯ & FALSE & 3.034184e-06 &  34 &   1 & 1096 &   632 & 1.107477e-04 & rs1131017 &  TRUE & rs1131017,rs7297175\\\\\n",
       "\t CD4T     & rs1131017\\_RPS26\\_positive & CEBPB & query\\_1 & TRUE & 0.028742923 & 2503 & 191 &  70 & 0.3664921 & ⋯ & FALSE & 2.421193e-05 & 159 &  41 & 7460 &  3833 & 2.877279e-04 & rs1131017 &  TRUE & rs7297175          \\\\\n",
       "\t CD4T     & rs1131017\\_RPS26\\_positive & CEBPB & query\\_1 & TRUE & 0.041618610 & 1598 & 191 &  49 & 0.2565445 & ⋯ & FALSE & 2.421193e-05 & 159 &  41 & 7460 &  3833 & 2.877279e-04 & rs1131017 &  TRUE & rs7297175          \\\\\n",
       "\t CD4T     & rs1131017\\_RPS26\\_positive & CEBPB & query\\_1 & TRUE & 0.017941417 & 2826 & 191 &  78 & 0.4083770 & ⋯ & FALSE & 2.421193e-05 & 159 &  41 & 7460 &  3833 & 2.877279e-04 & rs1131017 &  TRUE & rs7297175          \\\\\n",
       "\t CD4T     & rs1131017\\_RPS26\\_positive & CEBPD & query\\_1 & TRUE & 0.041618610 & 1464 & 191 &  46 & 0.2408377 & ⋯ & FALSE & 7.264122e-05 & 133 &  67 & 5970 &  5323 & 7.423933e-04 & rs1131017 &  TRUE & rs1131017,rs7297175\\\\\n",
       "\t CD4T     & rs1131017\\_RPS26\\_positive & ELK1  & query\\_1 & TRUE & 0.007142316 & 4467 & 191 & 113 & 0.5916230 & ⋯ & FALSE & 7.289311e-04 &  94 & 106 & 4030 &  7263 & 5.173386e-03 & rs1131017 &  TRUE & rs10876864         \\\\\n",
       "\t CD4T     & rs1131017\\_RPS26\\_positive & FLI1  & query\\_1 & TRUE & 0.024172382 & 2207 & 191 &  64 & 0.3350785 & ⋯ & FALSE & 9.006657e-03 & 131 &  69 & 6433 &  4860 & 4.002088e-02 & rs1131017 &  TRUE & rs1131017          \\\\\n",
       "\t CD4T     & rs1131017\\_RPS26\\_positive & FLI1  & query\\_1 & TRUE & 0.047267440 & 3060 & 191 &  80 & 0.4188482 & ⋯ & FALSE & 9.006657e-03 & 131 &  69 & 6433 &  4860 & 4.002088e-02 & rs1131017 &  TRUE & rs1131017          \\\\\n",
       "\t CD4T     & rs1131017\\_RPS26\\_positive & FLI1  & query\\_1 & TRUE & 0.041618610 & 2325 & 191 &  65 & 0.3403141 & ⋯ & FALSE & 9.006657e-03 & 131 &  69 & 6433 &  4860 & 4.002088e-02 & rs1131017 &  TRUE & rs1131017          \\\\\n",
       "\t CD4T     & rs1131017\\_RPS26\\_positive & HOXA9 & query\\_1 & TRUE & 0.002470867 & 3285 & 191 &  95 & 0.4973822 & ⋯ & FALSE & 1.610430e-05 &  20 & 180 &  372 & 10921 & 2.083458e-04 & rs1131017 & FALSE &                    \\\\\n",
       "\t CD4T     & rs1131017\\_RPS26\\_positive & HOXA9 & query\\_1 & TRUE & 0.007142316 &  732 & 191 &  31 & 0.1623037 & ⋯ & FALSE & 1.610430e-05 &  20 & 180 &  372 & 10921 & 2.083458e-04 & rs1131017 & FALSE &                    \\\\\n",
       "\t CD4T     & rs1131017\\_RPS26\\_positive & HOXA9 & query\\_1 & TRUE & 0.004188494 & 1124 & 191 &  43 & 0.2251309 & ⋯ & FALSE & 1.610430e-05 &  20 & 180 &  372 & 10921 & 2.083458e-04 & rs1131017 & FALSE &                    \\\\\n",
       "\t CD4T     & rs7605824\\_SH3YL1         & CEBPD & query\\_1 & TRUE & 0.022537569 & 3025 &  20 &  16 & 0.8000000 & ⋯ & FALSE & 2.865087e-03 &  17 &   3 & 5970 &  5323 & 3.327408e-02 & rs7605824 & FALSE &                    \\\\\n",
       "\t monocyte & rs1131017\\_RPS26          & CEBPB & query\\_1 & TRUE & 0.036312693 & 3140 & 126 &  65 & 0.5158730 & ⋯ & FALSE & 4.198463e-03 & 106 &  26 & 6655 &  2899 & 1.968270e-02 & rs1131017 &  TRUE & rs7297175          \\\\\n",
       "\t monocyte & rs1131017\\_RPS26          & CEBPB & query\\_1 & TRUE & 0.029926625 & 1416 & 126 &  37 & 0.2936508 & ⋯ & FALSE & 4.198463e-03 & 106 &  26 & 6655 &  2899 & 1.968270e-02 & rs1131017 &  TRUE & rs7297175          \\\\\n",
       "\t monocyte & rs1131017\\_RPS26          & CEBPB & query\\_1 & TRUE & 0.009638736 & 2201 & 126 &  55 & 0.4365079 & ⋯ & FALSE & 4.198463e-03 & 106 &  26 & 6655 &  2899 & 1.968270e-02 & rs1131017 &  TRUE & rs7297175          \\\\\n",
       "\t monocyte & rs1131017\\_RPS26          & CEBPB & query\\_1 & TRUE & 0.009638736 & 2479 & 126 &  59 & 0.4682540 & ⋯ & FALSE & 4.198463e-03 & 106 &  26 & 6655 &  2899 & 1.968270e-02 & rs1131017 &  TRUE & rs7297175          \\\\\n",
       "\t monocyte & rs1131017\\_RPS26          & CEBPB & query\\_1 & TRUE & 0.022933498 & 1622 & 126 &  42 & 0.3333333 & ⋯ & FALSE & 4.198463e-03 & 106 &  26 & 6655 &  2899 & 1.968270e-02 & rs1131017 &  TRUE & rs7297175          \\\\\n",
       "\t monocyte & rs1131017\\_RPS26          & CEBPD & query\\_1 & TRUE & 0.049065326 & 2296 & 126 &  51 & 0.4047619 & ⋯ & FALSE & 1.611238e-05 &  97 &  35 & 5295 &  4259 & 1.960339e-04 & rs1131017 &  TRUE & rs1131017,rs7297175\\\\\n",
       "\t monocyte & rs1131017\\_RPS26          & CEBPD & query\\_1 & TRUE & 0.026972058 & 1316 & 126 &  36 & 0.2857143 & ⋯ & FALSE & 1.611238e-05 &  97 &  35 & 5295 &  4259 & 1.960339e-04 & rs1131017 &  TRUE & rs1131017,rs7297175\\\\\n",
       "\t monocyte & rs1131017\\_RPS26          & CEBPD & query\\_1 & TRUE & 0.028679873 & 2564 & 126 &  57 & 0.4523810 & ⋯ & FALSE & 1.611238e-05 &  97 &  35 & 5295 &  4259 & 1.960339e-04 & rs1131017 &  TRUE & rs1131017,rs7297175\\\\\n",
       "\t monocyte & rs1131017\\_RPS26          & ELK1  & query\\_1 & TRUE & 0.028679873 & 4261 & 126 &  82 & 0.6507937 & ⋯ & FALSE & 8.317368e-05 &  71 &  61 & 3550 &  6004 & 8.019198e-04 & rs1131017 &  TRUE & rs10876864         \\\\\n",
       "\t monocyte & rs1131017\\_RPS26          & ELK1  & query\\_1 & TRUE & 0.049065326 & 3932 & 126 &  76 & 0.6031746 & ⋯ & FALSE & 8.317368e-05 &  71 &  61 & 3550 &  6004 & 8.019198e-04 & rs1131017 &  TRUE & rs10876864         \\\\\n",
       "\t monocyte & rs1131017\\_RPS26          & ELK1  & query\\_1 & TRUE & 0.044794472 & 2973 & 126 &  62 & 0.4920635 & ⋯ & FALSE & 8.317368e-05 &  71 &  61 & 3550 &  6004 & 8.019198e-04 & rs1131017 &  TRUE & rs10876864         \\\\\n",
       "\t monocyte & rs1131017\\_RPS26          & FLI1  & query\\_1 & TRUE & 0.036045611 & 1951 & 126 &  46 & 0.3650794 & ⋯ & FALSE & 4.430257e-04 &  97 &  35 & 5648 &  3906 & 3.144252e-03 & rs1131017 &  TRUE & rs1131017          \\\\\n",
       "\t NK       & rs1131017\\_RPS26          & CEBPB & query\\_1 & TRUE & 0.006603932 & 1894 &  94 &  48 & 0.5106383 & ⋯ & FALSE & 1.566936e-03 &  80 &  16 & 5050 &  2217 & 8.428465e-03 & rs1131017 &  TRUE & rs7297175          \\\\\n",
       "\t NK       & rs1131017\\_RPS26          & CEBPB & query\\_1 & TRUE & 0.031154971 & 1076 &  94 &  30 & 0.3191489 & ⋯ & FALSE & 1.566936e-03 &  80 &  16 & 5050 &  2217 & 8.428465e-03 & rs1131017 &  TRUE & rs7297175          \\\\\n",
       "\t NK       & rs1131017\\_RPS26          & CEBPB & query\\_1 & TRUE & 0.016996993 & 1249 &  94 &  34 & 0.3617021 & ⋯ & FALSE & 1.566936e-03 &  80 &  16 & 5050 &  2217 & 8.428465e-03 & rs1131017 &  TRUE & rs7297175          \\\\\n",
       "\t NK       & rs1131017\\_RPS26          & CEBPB & query\\_1 & TRUE & 0.007729787 & 1684 &  94 &  43 & 0.4574468 & ⋯ & FALSE & 1.566936e-03 &  80 &  16 & 5050 &  2217 & 8.428465e-03 & rs1131017 &  TRUE & rs7297175          \\\\\n",
       "\t NK       & rs1131017\\_RPS26          & CEBPD & query\\_1 & TRUE & 0.033634713 &  981 &  94 &  28 & 0.2978723 & ⋯ & FALSE & 5.531751e-07 &  78 &  18 & 4147 &  3120 & 8.833515e-06 & rs1131017 &  TRUE & rs1131017,rs7297175\\\\\n",
       "\t NK       & rs1131017\\_RPS26          & CEBPD & query\\_1 & TRUE & 0.030378248 & 1957 &  94 &  45 & 0.4787234 & ⋯ & FALSE & 5.531751e-07 &  78 &  18 & 4147 &  3120 & 8.833515e-06 & rs1131017 &  TRUE & rs1131017,rs7297175\\\\\n",
       "\\end{tabular}\n"
      ],
      "text/markdown": [
       "\n",
       "A data.frame: 31 × 26\n",
       "\n",
       "| TRANSFAC_cell_type &lt;chr&gt; | TRANSFAC_snp_eGene &lt;chr&gt; | TRANSFAC_tf &lt;chr&gt; | TRANSFAC_query &lt;chr&gt; | TRANSFAC_significant &lt;lgl&gt; | TRANSFAC_p_value &lt;dbl&gt; | TRANSFAC_term_size &lt;int&gt; | TRANSFAC_query_size &lt;int&gt; | TRANSFAC_intersection_size &lt;int&gt; | TRANSFAC_precision &lt;dbl&gt; | ⋯ ⋯ | ReMapTF.is.a.co.eGene. &lt;lgl&gt; | ReMapenrichment.p.value &lt;dbl&gt; | ReMapX..TF.overlap...co.eGene &lt;int&gt; | ReMapX..TF.overlap...background &lt;int&gt; | ReMapX..no.TF.overlap...co.eGene &lt;int&gt; | ReMapX..background.gene...not.co.eGene &lt;int&gt; | ReMapenrichment.fdr &lt;dbl&gt; | ReMapeQTL.SNP &lt;chr&gt; | ReMapSNP.overlaps.TF. &lt;lgl&gt; | ReMapNames.of.overlapping.SNPs &lt;chr&gt; |\n",
       "|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|\n",
       "| B        | rs1131017_RPS26          | CEBPD | query_1 | TRUE | 0.028703406 |  501 |  35 |  23 | 0.6571429 | ⋯ | FALSE | 3.034184e-06 |  34 |   1 | 1096 |   632 | 1.107477e-04 | rs1131017 |  TRUE | rs1131017,rs7297175 |\n",
       "| CD4T     | rs1131017_RPS26_positive | CEBPB | query_1 | TRUE | 0.028742923 | 2503 | 191 |  70 | 0.3664921 | ⋯ | FALSE | 2.421193e-05 | 159 |  41 | 7460 |  3833 | 2.877279e-04 | rs1131017 |  TRUE | rs7297175           |\n",
       "| CD4T     | rs1131017_RPS26_positive | CEBPB | query_1 | TRUE | 0.041618610 | 1598 | 191 |  49 | 0.2565445 | ⋯ | FALSE | 2.421193e-05 | 159 |  41 | 7460 |  3833 | 2.877279e-04 | rs1131017 |  TRUE | rs7297175           |\n",
       "| CD4T     | rs1131017_RPS26_positive | CEBPB | query_1 | TRUE | 0.017941417 | 2826 | 191 |  78 | 0.4083770 | ⋯ | FALSE | 2.421193e-05 | 159 |  41 | 7460 |  3833 | 2.877279e-04 | rs1131017 |  TRUE | rs7297175           |\n",
       "| CD4T     | rs1131017_RPS26_positive | CEBPD | query_1 | TRUE | 0.041618610 | 1464 | 191 |  46 | 0.2408377 | ⋯ | FALSE | 7.264122e-05 | 133 |  67 | 5970 |  5323 | 7.423933e-04 | rs1131017 |  TRUE | rs1131017,rs7297175 |\n",
       "| CD4T     | rs1131017_RPS26_positive | ELK1  | query_1 | TRUE | 0.007142316 | 4467 | 191 | 113 | 0.5916230 | ⋯ | FALSE | 7.289311e-04 |  94 | 106 | 4030 |  7263 | 5.173386e-03 | rs1131017 |  TRUE | rs10876864          |\n",
       "| CD4T     | rs1131017_RPS26_positive | FLI1  | query_1 | TRUE | 0.024172382 | 2207 | 191 |  64 | 0.3350785 | ⋯ | FALSE | 9.006657e-03 | 131 |  69 | 6433 |  4860 | 4.002088e-02 | rs1131017 |  TRUE | rs1131017           |\n",
       "| CD4T     | rs1131017_RPS26_positive | FLI1  | query_1 | TRUE | 0.047267440 | 3060 | 191 |  80 | 0.4188482 | ⋯ | FALSE | 9.006657e-03 | 131 |  69 | 6433 |  4860 | 4.002088e-02 | rs1131017 |  TRUE | rs1131017           |\n",
       "| CD4T     | rs1131017_RPS26_positive | FLI1  | query_1 | TRUE | 0.041618610 | 2325 | 191 |  65 | 0.3403141 | ⋯ | FALSE | 9.006657e-03 | 131 |  69 | 6433 |  4860 | 4.002088e-02 | rs1131017 |  TRUE | rs1131017           |\n",
       "| CD4T     | rs1131017_RPS26_positive | HOXA9 | query_1 | TRUE | 0.002470867 | 3285 | 191 |  95 | 0.4973822 | ⋯ | FALSE | 1.610430e-05 |  20 | 180 |  372 | 10921 | 2.083458e-04 | rs1131017 | FALSE | <!----> |\n",
       "| CD4T     | rs1131017_RPS26_positive | HOXA9 | query_1 | TRUE | 0.007142316 |  732 | 191 |  31 | 0.1623037 | ⋯ | FALSE | 1.610430e-05 |  20 | 180 |  372 | 10921 | 2.083458e-04 | rs1131017 | FALSE | <!----> |\n",
       "| CD4T     | rs1131017_RPS26_positive | HOXA9 | query_1 | TRUE | 0.004188494 | 1124 | 191 |  43 | 0.2251309 | ⋯ | FALSE | 1.610430e-05 |  20 | 180 |  372 | 10921 | 2.083458e-04 | rs1131017 | FALSE | <!----> |\n",
       "| CD4T     | rs7605824_SH3YL1         | CEBPD | query_1 | TRUE | 0.022537569 | 3025 |  20 |  16 | 0.8000000 | ⋯ | FALSE | 2.865087e-03 |  17 |   3 | 5970 |  5323 | 3.327408e-02 | rs7605824 | FALSE | <!----> |\n",
       "| monocyte | rs1131017_RPS26          | CEBPB | query_1 | TRUE | 0.036312693 | 3140 | 126 |  65 | 0.5158730 | ⋯ | FALSE | 4.198463e-03 | 106 |  26 | 6655 |  2899 | 1.968270e-02 | rs1131017 |  TRUE | rs7297175           |\n",
       "| monocyte | rs1131017_RPS26          | CEBPB | query_1 | TRUE | 0.029926625 | 1416 | 126 |  37 | 0.2936508 | ⋯ | FALSE | 4.198463e-03 | 106 |  26 | 6655 |  2899 | 1.968270e-02 | rs1131017 |  TRUE | rs7297175           |\n",
       "| monocyte | rs1131017_RPS26          | CEBPB | query_1 | TRUE | 0.009638736 | 2201 | 126 |  55 | 0.4365079 | ⋯ | FALSE | 4.198463e-03 | 106 |  26 | 6655 |  2899 | 1.968270e-02 | rs1131017 |  TRUE | rs7297175           |\n",
       "| monocyte | rs1131017_RPS26          | CEBPB | query_1 | TRUE | 0.009638736 | 2479 | 126 |  59 | 0.4682540 | ⋯ | FALSE | 4.198463e-03 | 106 |  26 | 6655 |  2899 | 1.968270e-02 | rs1131017 |  TRUE | rs7297175           |\n",
       "| monocyte | rs1131017_RPS26          | CEBPB | query_1 | TRUE | 0.022933498 | 1622 | 126 |  42 | 0.3333333 | ⋯ | FALSE | 4.198463e-03 | 106 |  26 | 6655 |  2899 | 1.968270e-02 | rs1131017 |  TRUE | rs7297175           |\n",
       "| monocyte | rs1131017_RPS26          | CEBPD | query_1 | TRUE | 0.049065326 | 2296 | 126 |  51 | 0.4047619 | ⋯ | FALSE | 1.611238e-05 |  97 |  35 | 5295 |  4259 | 1.960339e-04 | rs1131017 |  TRUE | rs1131017,rs7297175 |\n",
       "| monocyte | rs1131017_RPS26          | CEBPD | query_1 | TRUE | 0.026972058 | 1316 | 126 |  36 | 0.2857143 | ⋯ | FALSE | 1.611238e-05 |  97 |  35 | 5295 |  4259 | 1.960339e-04 | rs1131017 |  TRUE | rs1131017,rs7297175 |\n",
       "| monocyte | rs1131017_RPS26          | CEBPD | query_1 | TRUE | 0.028679873 | 2564 | 126 |  57 | 0.4523810 | ⋯ | FALSE | 1.611238e-05 |  97 |  35 | 5295 |  4259 | 1.960339e-04 | rs1131017 |  TRUE | rs1131017,rs7297175 |\n",
       "| monocyte | rs1131017_RPS26          | ELK1  | query_1 | TRUE | 0.028679873 | 4261 | 126 |  82 | 0.6507937 | ⋯ | FALSE | 8.317368e-05 |  71 |  61 | 3550 |  6004 | 8.019198e-04 | rs1131017 |  TRUE | rs10876864          |\n",
       "| monocyte | rs1131017_RPS26          | ELK1  | query_1 | TRUE | 0.049065326 | 3932 | 126 |  76 | 0.6031746 | ⋯ | FALSE | 8.317368e-05 |  71 |  61 | 3550 |  6004 | 8.019198e-04 | rs1131017 |  TRUE | rs10876864          |\n",
       "| monocyte | rs1131017_RPS26          | ELK1  | query_1 | TRUE | 0.044794472 | 2973 | 126 |  62 | 0.4920635 | ⋯ | FALSE | 8.317368e-05 |  71 |  61 | 3550 |  6004 | 8.019198e-04 | rs1131017 |  TRUE | rs10876864          |\n",
       "| monocyte | rs1131017_RPS26          | FLI1  | query_1 | TRUE | 0.036045611 | 1951 | 126 |  46 | 0.3650794 | ⋯ | FALSE | 4.430257e-04 |  97 |  35 | 5648 |  3906 | 3.144252e-03 | rs1131017 |  TRUE | rs1131017           |\n",
       "| NK       | rs1131017_RPS26          | CEBPB | query_1 | TRUE | 0.006603932 | 1894 |  94 |  48 | 0.5106383 | ⋯ | FALSE | 1.566936e-03 |  80 |  16 | 5050 |  2217 | 8.428465e-03 | rs1131017 |  TRUE | rs7297175           |\n",
       "| NK       | rs1131017_RPS26          | CEBPB | query_1 | TRUE | 0.031154971 | 1076 |  94 |  30 | 0.3191489 | ⋯ | FALSE | 1.566936e-03 |  80 |  16 | 5050 |  2217 | 8.428465e-03 | rs1131017 |  TRUE | rs7297175           |\n",
       "| NK       | rs1131017_RPS26          | CEBPB | query_1 | TRUE | 0.016996993 | 1249 |  94 |  34 | 0.3617021 | ⋯ | FALSE | 1.566936e-03 |  80 |  16 | 5050 |  2217 | 8.428465e-03 | rs1131017 |  TRUE | rs7297175           |\n",
       "| NK       | rs1131017_RPS26          | CEBPB | query_1 | TRUE | 0.007729787 | 1684 |  94 |  43 | 0.4574468 | ⋯ | FALSE | 1.566936e-03 |  80 |  16 | 5050 |  2217 | 8.428465e-03 | rs1131017 |  TRUE | rs7297175           |\n",
       "| NK       | rs1131017_RPS26          | CEBPD | query_1 | TRUE | 0.033634713 |  981 |  94 |  28 | 0.2978723 | ⋯ | FALSE | 5.531751e-07 |  78 |  18 | 4147 |  3120 | 8.833515e-06 | rs1131017 |  TRUE | rs1131017,rs7297175 |\n",
       "| NK       | rs1131017_RPS26          | CEBPD | query_1 | TRUE | 0.030378248 | 1957 |  94 |  45 | 0.4787234 | ⋯ | FALSE | 5.531751e-07 |  78 |  18 | 4147 |  3120 | 8.833515e-06 | rs1131017 |  TRUE | rs1131017,rs7297175 |\n",
       "\n"
      ],
      "text/plain": [
       "   TRANSFAC_cell_type TRANSFAC_snp_eGene       TRANSFAC_tf TRANSFAC_query\n",
       "1  B                  rs1131017_RPS26          CEBPD       query_1       \n",
       "2  CD4T               rs1131017_RPS26_positive CEBPB       query_1       \n",
       "3  CD4T               rs1131017_RPS26_positive CEBPB       query_1       \n",
       "4  CD4T               rs1131017_RPS26_positive CEBPB       query_1       \n",
       "5  CD4T               rs1131017_RPS26_positive CEBPD       query_1       \n",
       "6  CD4T               rs1131017_RPS26_positive ELK1        query_1       \n",
       "7  CD4T               rs1131017_RPS26_positive FLI1        query_1       \n",
       "8  CD4T               rs1131017_RPS26_positive FLI1        query_1       \n",
       "9  CD4T               rs1131017_RPS26_positive FLI1        query_1       \n",
       "10 CD4T               rs1131017_RPS26_positive HOXA9       query_1       \n",
       "11 CD4T               rs1131017_RPS26_positive HOXA9       query_1       \n",
       "12 CD4T               rs1131017_RPS26_positive HOXA9       query_1       \n",
       "13 CD4T               rs7605824_SH3YL1         CEBPD       query_1       \n",
       "14 monocyte           rs1131017_RPS26          CEBPB       query_1       \n",
       "15 monocyte           rs1131017_RPS26          CEBPB       query_1       \n",
       "16 monocyte           rs1131017_RPS26          CEBPB       query_1       \n",
       "17 monocyte           rs1131017_RPS26          CEBPB       query_1       \n",
       "18 monocyte           rs1131017_RPS26          CEBPB       query_1       \n",
       "19 monocyte           rs1131017_RPS26          CEBPD       query_1       \n",
       "20 monocyte           rs1131017_RPS26          CEBPD       query_1       \n",
       "21 monocyte           rs1131017_RPS26          CEBPD       query_1       \n",
       "22 monocyte           rs1131017_RPS26          ELK1        query_1       \n",
       "23 monocyte           rs1131017_RPS26          ELK1        query_1       \n",
       "24 monocyte           rs1131017_RPS26          ELK1        query_1       \n",
       "25 monocyte           rs1131017_RPS26          FLI1        query_1       \n",
       "26 NK                 rs1131017_RPS26          CEBPB       query_1       \n",
       "27 NK                 rs1131017_RPS26          CEBPB       query_1       \n",
       "28 NK                 rs1131017_RPS26          CEBPB       query_1       \n",
       "29 NK                 rs1131017_RPS26          CEBPB       query_1       \n",
       "30 NK                 rs1131017_RPS26          CEBPD       query_1       \n",
       "31 NK                 rs1131017_RPS26          CEBPD       query_1       \n",
       "   TRANSFAC_significant TRANSFAC_p_value TRANSFAC_term_size TRANSFAC_query_size\n",
       "1  TRUE                 0.028703406       501                35                \n",
       "2  TRUE                 0.028742923      2503               191                \n",
       "3  TRUE                 0.041618610      1598               191                \n",
       "4  TRUE                 0.017941417      2826               191                \n",
       "5  TRUE                 0.041618610      1464               191                \n",
       "6  TRUE                 0.007142316      4467               191                \n",
       "7  TRUE                 0.024172382      2207               191                \n",
       "8  TRUE                 0.047267440      3060               191                \n",
       "9  TRUE                 0.041618610      2325               191                \n",
       "10 TRUE                 0.002470867      3285               191                \n",
       "11 TRUE                 0.007142316       732               191                \n",
       "12 TRUE                 0.004188494      1124               191                \n",
       "13 TRUE                 0.022537569      3025                20                \n",
       "14 TRUE                 0.036312693      3140               126                \n",
       "15 TRUE                 0.029926625      1416               126                \n",
       "16 TRUE                 0.009638736      2201               126                \n",
       "17 TRUE                 0.009638736      2479               126                \n",
       "18 TRUE                 0.022933498      1622               126                \n",
       "19 TRUE                 0.049065326      2296               126                \n",
       "20 TRUE                 0.026972058      1316               126                \n",
       "21 TRUE                 0.028679873      2564               126                \n",
       "22 TRUE                 0.028679873      4261               126                \n",
       "23 TRUE                 0.049065326      3932               126                \n",
       "24 TRUE                 0.044794472      2973               126                \n",
       "25 TRUE                 0.036045611      1951               126                \n",
       "26 TRUE                 0.006603932      1894                94                \n",
       "27 TRUE                 0.031154971      1076                94                \n",
       "28 TRUE                 0.016996993      1249                94                \n",
       "29 TRUE                 0.007729787      1684                94                \n",
       "30 TRUE                 0.033634713       981                94                \n",
       "31 TRUE                 0.030378248      1957                94                \n",
       "   TRANSFAC_intersection_size TRANSFAC_precision ⋯ ReMapTF.is.a.co.eGene.\n",
       "1   23                        0.6571429          ⋯ FALSE                 \n",
       "2   70                        0.3664921          ⋯ FALSE                 \n",
       "3   49                        0.2565445          ⋯ FALSE                 \n",
       "4   78                        0.4083770          ⋯ FALSE                 \n",
       "5   46                        0.2408377          ⋯ FALSE                 \n",
       "6  113                        0.5916230          ⋯ FALSE                 \n",
       "7   64                        0.3350785          ⋯ FALSE                 \n",
       "8   80                        0.4188482          ⋯ FALSE                 \n",
       "9   65                        0.3403141          ⋯ FALSE                 \n",
       "10  95                        0.4973822          ⋯ FALSE                 \n",
       "11  31                        0.1623037          ⋯ FALSE                 \n",
       "12  43                        0.2251309          ⋯ FALSE                 \n",
       "13  16                        0.8000000          ⋯ FALSE                 \n",
       "14  65                        0.5158730          ⋯ FALSE                 \n",
       "15  37                        0.2936508          ⋯ FALSE                 \n",
       "16  55                        0.4365079          ⋯ FALSE                 \n",
       "17  59                        0.4682540          ⋯ FALSE                 \n",
       "18  42                        0.3333333          ⋯ FALSE                 \n",
       "19  51                        0.4047619          ⋯ FALSE                 \n",
       "20  36                        0.2857143          ⋯ FALSE                 \n",
       "21  57                        0.4523810          ⋯ FALSE                 \n",
       "22  82                        0.6507937          ⋯ FALSE                 \n",
       "23  76                        0.6031746          ⋯ FALSE                 \n",
       "24  62                        0.4920635          ⋯ FALSE                 \n",
       "25  46                        0.3650794          ⋯ FALSE                 \n",
       "26  48                        0.5106383          ⋯ FALSE                 \n",
       "27  30                        0.3191489          ⋯ FALSE                 \n",
       "28  34                        0.3617021          ⋯ FALSE                 \n",
       "29  43                        0.4574468          ⋯ FALSE                 \n",
       "30  28                        0.2978723          ⋯ FALSE                 \n",
       "31  45                        0.4787234          ⋯ FALSE                 \n",
       "   ReMapenrichment.p.value ReMapX..TF.overlap...co.eGene\n",
       "1  3.034184e-06             34                          \n",
       "2  2.421193e-05            159                          \n",
       "3  2.421193e-05            159                          \n",
       "4  2.421193e-05            159                          \n",
       "5  7.264122e-05            133                          \n",
       "6  7.289311e-04             94                          \n",
       "7  9.006657e-03            131                          \n",
       "8  9.006657e-03            131                          \n",
       "9  9.006657e-03            131                          \n",
       "10 1.610430e-05             20                          \n",
       "11 1.610430e-05             20                          \n",
       "12 1.610430e-05             20                          \n",
       "13 2.865087e-03             17                          \n",
       "14 4.198463e-03            106                          \n",
       "15 4.198463e-03            106                          \n",
       "16 4.198463e-03            106                          \n",
       "17 4.198463e-03            106                          \n",
       "18 4.198463e-03            106                          \n",
       "19 1.611238e-05             97                          \n",
       "20 1.611238e-05             97                          \n",
       "21 1.611238e-05             97                          \n",
       "22 8.317368e-05             71                          \n",
       "23 8.317368e-05             71                          \n",
       "24 8.317368e-05             71                          \n",
       "25 4.430257e-04             97                          \n",
       "26 1.566936e-03             80                          \n",
       "27 1.566936e-03             80                          \n",
       "28 1.566936e-03             80                          \n",
       "29 1.566936e-03             80                          \n",
       "30 5.531751e-07             78                          \n",
       "31 5.531751e-07             78                          \n",
       "   ReMapX..TF.overlap...background ReMapX..no.TF.overlap...co.eGene\n",
       "1    1                             1096                            \n",
       "2   41                             7460                            \n",
       "3   41                             7460                            \n",
       "4   41                             7460                            \n",
       "5   67                             5970                            \n",
       "6  106                             4030                            \n",
       "7   69                             6433                            \n",
       "8   69                             6433                            \n",
       "9   69                             6433                            \n",
       "10 180                              372                            \n",
       "11 180                              372                            \n",
       "12 180                              372                            \n",
       "13   3                             5970                            \n",
       "14  26                             6655                            \n",
       "15  26                             6655                            \n",
       "16  26                             6655                            \n",
       "17  26                             6655                            \n",
       "18  26                             6655                            \n",
       "19  35                             5295                            \n",
       "20  35                             5295                            \n",
       "21  35                             5295                            \n",
       "22  61                             3550                            \n",
       "23  61                             3550                            \n",
       "24  61                             3550                            \n",
       "25  35                             5648                            \n",
       "26  16                             5050                            \n",
       "27  16                             5050                            \n",
       "28  16                             5050                            \n",
       "29  16                             5050                            \n",
       "30  18                             4147                            \n",
       "31  18                             4147                            \n",
       "   ReMapX..background.gene...not.co.eGene ReMapenrichment.fdr ReMapeQTL.SNP\n",
       "1    632                                  1.107477e-04        rs1131017    \n",
       "2   3833                                  2.877279e-04        rs1131017    \n",
       "3   3833                                  2.877279e-04        rs1131017    \n",
       "4   3833                                  2.877279e-04        rs1131017    \n",
       "5   5323                                  7.423933e-04        rs1131017    \n",
       "6   7263                                  5.173386e-03        rs1131017    \n",
       "7   4860                                  4.002088e-02        rs1131017    \n",
       "8   4860                                  4.002088e-02        rs1131017    \n",
       "9   4860                                  4.002088e-02        rs1131017    \n",
       "10 10921                                  2.083458e-04        rs1131017    \n",
       "11 10921                                  2.083458e-04        rs1131017    \n",
       "12 10921                                  2.083458e-04        rs1131017    \n",
       "13  5323                                  3.327408e-02        rs7605824    \n",
       "14  2899                                  1.968270e-02        rs1131017    \n",
       "15  2899                                  1.968270e-02        rs1131017    \n",
       "16  2899                                  1.968270e-02        rs1131017    \n",
       "17  2899                                  1.968270e-02        rs1131017    \n",
       "18  2899                                  1.968270e-02        rs1131017    \n",
       "19  4259                                  1.960339e-04        rs1131017    \n",
       "20  4259                                  1.960339e-04        rs1131017    \n",
       "21  4259                                  1.960339e-04        rs1131017    \n",
       "22  6004                                  8.019198e-04        rs1131017    \n",
       "23  6004                                  8.019198e-04        rs1131017    \n",
       "24  6004                                  8.019198e-04        rs1131017    \n",
       "25  3906                                  3.144252e-03        rs1131017    \n",
       "26  2217                                  8.428465e-03        rs1131017    \n",
       "27  2217                                  8.428465e-03        rs1131017    \n",
       "28  2217                                  8.428465e-03        rs1131017    \n",
       "29  2217                                  8.428465e-03        rs1131017    \n",
       "30  3120                                  8.833515e-06        rs1131017    \n",
       "31  3120                                  8.833515e-06        rs1131017    \n",
       "   ReMapSNP.overlaps.TF. ReMapNames.of.overlapping.SNPs\n",
       "1   TRUE                 rs1131017,rs7297175           \n",
       "2   TRUE                 rs7297175                     \n",
       "3   TRUE                 rs7297175                     \n",
       "4   TRUE                 rs7297175                     \n",
       "5   TRUE                 rs1131017,rs7297175           \n",
       "6   TRUE                 rs10876864                    \n",
       "7   TRUE                 rs1131017                     \n",
       "8   TRUE                 rs1131017                     \n",
       "9   TRUE                 rs1131017                     \n",
       "10 FALSE                                               \n",
       "11 FALSE                                               \n",
       "12 FALSE                                               \n",
       "13 FALSE                                               \n",
       "14  TRUE                 rs7297175                     \n",
       "15  TRUE                 rs7297175                     \n",
       "16  TRUE                 rs7297175                     \n",
       "17  TRUE                 rs7297175                     \n",
       "18  TRUE                 rs7297175                     \n",
       "19  TRUE                 rs1131017,rs7297175           \n",
       "20  TRUE                 rs1131017,rs7297175           \n",
       "21  TRUE                 rs1131017,rs7297175           \n",
       "22  TRUE                 rs10876864                    \n",
       "23  TRUE                 rs10876864                    \n",
       "24  TRUE                 rs10876864                    \n",
       "25  TRUE                 rs1131017                     \n",
       "26  TRUE                 rs7297175                     \n",
       "27  TRUE                 rs7297175                     \n",
       "28  TRUE                 rs7297175                     \n",
       "29  TRUE                 rs7297175                     \n",
       "30  TRUE                 rs1131017,rs7297175           \n",
       "31  TRUE                 rs1131017,rs7297175           "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "combined"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 305,
   "id": "7ad1c204-f2d5-40ff-8f73-02871ed2a498",
   "metadata": {},
   "outputs": [],
   "source": [
    "write.csv(combined, paste0(path, \"transfac_results/TRANSFAC_ReMap_matches.csv\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "23af7863-6d69-4dd0-be60-8689305884dc",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "R",
   "language": "R",
   "name": "ir"
  },
  "language_info": {
   "codemirror_mode": "r",
   "file_extension": ".r",
   "mimetype": "text/x-r-source",
   "name": "R",
   "pygments_lexer": "r",
   "version": "4.1.1"
  },
  "toc-autonumbering": false
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
